javadoc.py 9.8 KB
Edit Raw Blame History Permalink



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250


#
# javadoc.py: javadoc docstring parsing
# Edward Loper
#
# Created [07/03/03 12:37 PM]
# $Id: javadoc.py 1574 2007-03-07 02:55:14Z dvarrazzo $
#

"""
Epydoc parser for U{Javadoc<http://java.sun.com/j2se/javadoc/>}
docstrings.  Javadoc is an HTML-based markup language that was
developed for documenting Java APIs with inline comments.  It consists
of raw HTML, augmented by Javadoc tags.  There are two types of
Javadoc tag:

  - X{Javadoc block tags} correspond to Epydoc fields.  They are
    marked by starting a line with a string of the form \"C{@M{tag}
    [M{arg}]}\", where C{M{tag}} indicates the type of block, and
    C{M{arg}} is an optional argument.  (For fields that take
    arguments, Javadoc assumes that the single word immediately
    following the tag is an argument; multi-word arguments cannot be
    used with javadoc.)  
  
  - X{inline Javadoc tags} are used for inline markup.  In particular,
    epydoc uses them for crossreference links between documentation.
    Inline tags may appear anywhere in the text, and have the form
    \"C{{@M{tag} M{[args...]}}}\", where C{M{tag}} indicates the
    type of inline markup, and C{M{args}} are optional arguments.

Epydoc supports all Javadoc tags, I{except}:
  - C{{@docRoot}}, which gives the (relative) URL of the generated
    documentation's root.
  - C{{@inheritDoc}}, which copies the documentation of the nearest
    overridden object.  This can be used to combine the documentation
    of the overridden object with the documentation of the
    overridding object.
  - C{@serial}, C{@serialField}, and C{@serialData} which describe the
    serialization (pickling) of an object.
  - C{{@value}}, which copies the value of a constant.

@warning: Epydoc only supports HTML output for Javadoc docstrings.
"""
__docformat__ = 'epytext en'

# Imports
import re
from xml.dom.minidom import *
from epydoc.markup import *

def parse_docstring(docstring, errors, **options):
    """
    Parse the given docstring, which is formatted using Javadoc; and
    return a C{ParsedDocstring} representation of its contents.
    @param docstring: The docstring to parse
    @type docstring: C{string}
    @param errors: A list where any errors generated during parsing
        will be stored.
    @type errors: C{list} of L{ParseError}
    @param options: Extra options.  Unknown options are ignored.
        Currently, no extra options are defined.
    @rtype: L{ParsedDocstring}
    """
    return ParsedJavadocDocstring(docstring, errors)

class ParsedJavadocDocstring(ParsedDocstring):
    """
    An encoded version of a Javadoc docstring.  Since Javadoc is a
    fairly simple markup language, we don't do any processing in
    advance; instead, we wait to split fields or resolve
    crossreference links until we need to.

    @group Field Splitting: split_fields, _ARG_FIELDS, _FIELD_RE
    @cvar _ARG_FIELDS: A list of the fields that take arguments.
        Since Javadoc doesn't mark arguments in any special way, we
        must consult this list to decide whether the first word of a
        field is an argument or not.
    @cvar _FIELD_RE: A regular expression used to search for Javadoc
        block tags.

    @group HTML Output: to_html, _LINK_SPLIT_RE, _LINK_RE
    @cvar _LINK_SPLIT_RE: A regular expression used to search for
        Javadoc inline tags.
    @cvar _LINK_RE: A regular expression used to process Javadoc
        inline tags.
    """
    def __init__(self, docstring, errors=None):
        """
        Create a new C{ParsedJavadocDocstring}.
        
        @param docstring: The docstring that should be used to
            construct this C{ParsedJavadocDocstring}.
        @type docstring: C{string}
        @param errors: A list where any errors generated during
            parsing will be stored.  If no list is given, then
            all errors are ignored.
        @type errors: C{list} of L{ParseError}
        """
        self._docstring = docstring
        if errors is None: errors = []
        self._check_links(errors)

    #////////////////////////////////////////////////////////////
    # Field Splitting
    #////////////////////////////////////////////////////////////

    _ARG_FIELDS = ('group variable var type cvariable cvar ivariable '+
                   'ivar param '+
                   'parameter arg argument raise raises exception '+
                   'except deffield newfield keyword kwarg kwparam').split()
    _FIELD_RE = re.compile(r'(^\s*\@\w+[\s$])', re.MULTILINE)
    
    # Inherit docs from ParsedDocstring.
    def split_fields(self, errors=None):

        # Split the docstring into an alternating list of field tags
        # and text (odd pieces are field tags).
        pieces = self._FIELD_RE.split(self._docstring)

        # The first piece is the description.
        descr = ParsedJavadocDocstring(pieces[0])

        # The remaining pieces are the block fields (alternating tags
        # and bodies; odd pieces are tags).
        fields = []
        for i in range(1, len(pieces)):
            if i%2 == 1:
                # Get the field tag.
                tag = pieces[i].strip()[1:]
            else:
                # Get the field argument (if appropriate).
                if tag in self._ARG_FIELDS:
                    subpieces = pieces[i].strip().split(None, 1)+['','']
                    (arg, body) = subpieces[:2]
                else:
                    (arg, body) = (None, pieces[i])

                # Special processing for @see fields, since Epydoc
                # allows unrestricted text in them, but Javadoc just
                # uses them for xref links:
                if tag == 'see' and body:
                    if body[0] in '"\'':
                        if body[-1] == body[0]: body = body[1:-1]
                    elif body[0] == '<': pass
                    else: body = '{@link %s}' % body

                # Construct the field.
                parsed_body = ParsedJavadocDocstring(body)
                fields.append(Field(tag, arg, parsed_body))

        if pieces[0].strip():
            return (descr, fields)
        else:
            return (None, fields)

    #////////////////////////////////////////////////////////////
    # HTML Output.
    #////////////////////////////////////////////////////////////

    _LINK_SPLIT_RE = re.compile(r'({@link(?:plain)?\s[^}]+})')
    _LINK_RE = re.compile(r'{@link(?:plain)?\s+' + r'([\w#.]+)' +
                          r'(?:\([^\)]*\))?' + r'(\s+.*)?' + r'}')

    # Inherit docs from ParsedDocstring.
    def to_html(self, docstring_linker, **options):
        # Split the docstring into an alternating list of HTML and
        # links (odd pieces are links).
        pieces = self._LINK_SPLIT_RE.split(self._docstring)

        # This function is used to translate {@link ...}s to HTML.
        translate_xref = docstring_linker.translate_identifier_xref
        
        # Build up the HTML string from the pieces.  For HTML pieces
        # (even), just add it to html.  For link pieces (odd), use
        # docstring_linker to translate the crossreference link to
        # HTML for us.
        html = ''
        for i in range(len(pieces)):
            if i%2 == 0:
                html += pieces[i]
            else:
                # Decompose the link into pieces.
                m = self._LINK_RE.match(pieces[i])
                if m is None: continue # Error flagged by _check_links
                (target, name) = m.groups()

                # Normalize the target name.
                if target[0] == '#': target = target[1:]
                target = target.replace('#', '.')
                target = re.sub(r'\(.*\)', '', target)

                # Provide a name, if it wasn't specified.
                if name is None: name = target
                else: name = name.strip()

                # Use docstring_linker to convert the name to html.
                html += translate_xref(target, name)
        return html

    def _check_links(self, errors):
        """
        Make sure that all @{link}s are valid.  We need a separate
        method for ths because we want to do this at parse time, not
        html output time.  Any errors found are appended to C{errors}.
        """
        pieces = self._LINK_SPLIT_RE.split(self._docstring)
        linenum = 0
        for i in range(len(pieces)):
            if i%2 == 1 and not self._LINK_RE.match(pieces[i]):
                estr = 'Bad link %r' % pieces[i]
                errors.append(ParseError(estr, linenum, is_fatal=0))
            linenum += pieces[i].count('\n')

    #////////////////////////////////////////////////////////////
    # Plaintext Output.
    #////////////////////////////////////////////////////////////

    # Inherit docs from ParsedDocstring.  Since we don't define
    # to_latex, this is used when generating latex output.
    def to_plaintext(self, docstring_linker, **options):
        return self._docstring

    _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)')

    # Jeff's hack to get summary working
    def summary(self):
        # Drop tags
        doc = "\n".join([ row for row in self._docstring.split('\n')
                          if not row.lstrip().startswith('@') ])

        m = self._SUMMARY_RE.match(doc)
        if m:
            other = doc[m.end():]
            return (ParsedJavadocDocstring(m.group(1)),
                    other != '' and not other.isspace())
            
        else:
            parts = doc.strip('\n').split('\n', 1)
            if len(parts) == 1:
                summary = parts[0]
                other = False
            else:
                summary = parts[0] + '...'
                other = True
            
            return ParsedJavadocDocstring(summary), other
        
#     def concatenate(self, other):
#         if not isinstance(other, ParsedJavadocDocstring):
#             raise ValueError, 'Could not concatenate docstrings'
#         return ParsedJavadocDocstring(self._docstring+other._docstring)