root/trunk/cheesecake/codeparser.py

Revision 150, 7.4 kB (checked in by mk, 8 years ago)

Merging mk branch into the trunk.

  • Property svn:executable set to *
Line 
1 import doctest
2 import os
3 import re
4
5 import logger
6 from model import System, Module, Class, Function, parseFile, processModuleAst
7
8
9 # Python 2.3/2.4 compatibilty hacks.
10 if getattr(doctest, 'DocTestParser', False):
11     # Python 2.4 have DocTestParser class.
12     get_doctests = doctest.DocTestParser().get_examples
13 else:
14     # Python 2.3 have _extract_examples function.
15     get_doctests = doctest._extract_examples
16
17
18 def compile_regex(pattern, user_map=None):
19     """Compile a regex pattern using default or user mapping.
20     """
21
22     # Handy regular expressions.
23     mapping = {'ALPHA': r'[-.,?!\w]', 'WORD': r'[-.,?!\s\w]',
24                        'START': r'(^|\s)', 'END': r'([.,?!\s]|$)'}
25
26     if user_map:
27         mapping = mapping.copy()
28         mapping.update(user_map)
29
30     def sub(text, mapping):
31         for From, To in mapping.iteritems():
32             text = text.replace(From, To)
33         return text
34
35     pattern = sub(pattern, mapping)
36
37     return re.compile(pattern, re.LOCALE | re.VERBOSE)
38
39 def inline_markup(start, end=None, mapping=None):
40     if end is None:
41         end = start
42     return compile_regex(r'''(START  %(start)s  ALPHA  %(end)s  END) |
43            (START  %(start)s  ALPHA  WORD*  ALPHA  %(end)s  END)'''\
44                          % {'start': start, 'end': end}, mapping)
45
46 def line_markup(start, end=None):
47     return inline_markup(start, end, mapping={'ALPHA': r'[-.,?!\s\w]',
48                                               'START': r'(\n|^)[\ \t]*',
49                                               'END': r''})
50
51 supported_formats = {
52     # reST refrence: http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html
53     'reST': [
54         inline_markup(r'\*'), # emphasis
55         inline_markup(r'\*\*'), # strong
56         inline_markup(r'``'), # inline
57         inline_markup(r'\(', r'_\)', # hyperlink
58                       {'ALPHA': r'\w', 'WORD': r'[-.\w]'}),
59         inline_markup(r'\(`', r'`_\)'), # long hyperlink
60         line_markup(r':'), # field
61         line_markup(r'[*+-]', r''), # unordered list
62         line_markup(r'((\d+) | ([a-zA-Z]+) [.\)])', r''), # ordered list
63         line_markup(r'\(  ((\d+)  |  ([a-zA-Z]+))  \)', r''), # ordered list
64     ],
65
66     # epytext reference: http://epydoc.sourceforge.net/epytext.html
67     'epytext': [
68         re.compile(r'[BCEGILMSUX]\{.*\}'), # inline elements
69         line_markup(r'@[a-z]+([\ \t][a-zA-Z]+)?:', r''), # fields
70         line_markup(r'-', r''), # unordered list
71         line_markup(r'\d+(\.\d+)*', r''), # ordered list
72     ],
73
74     # javadoc reference: http://java.sun.com/j2se/1.4.2/docs/tooldocs/solaris/javadoc.html
75     'javadoc': [
76         re.compile(r'<[a-zA-z]+[^>]*>'), # HTML elements
77         line_markup(r'@[a-z][a-zA-Z]*\s', r''), # normal tags
78         re.compile(r'{@  ((docRoot) | (inheritDoc) | (link) | (linkplain) |'\
79                     ' (value))  [^}]*  }', re.VERBOSE), # special tags
80     ],
81 }
82
83
84 def use_format(text, format):
85     """Return True if text includes given documentation format
86     and False otherwise.
87
88     See supported_formats for list of known formats.
89     """
90     for pattern in supported_formats[format]:
91         if re.search(pattern, text):
92             return True
93
94     return False
95
96
97 class CodeParser(object):
98     """Information about the structure of a Python module.
99
100     * Collects modules, classes, methods, functions and associated docstrings
101     * Based on mwh's docextractor.model module
102     """
103     def __init__(self, pyfile, log=None):
104         """Initialize Code Parser object.
105
106         :Parameters:
107           `pyfile` : str
108               Path to a Python module to parse.
109           `log` : logger.Producer instance
110               Logger to use during code parsing.
111         """
112         if log:
113             self.log = log.codeparser
114         else:
115             self.log = logger.default.codeparser
116         self.modules = []
117         self.classes = []
118         self.methods = []
119         self.method_func = []
120         self.functions = []
121         self.docstrings = [] # objects that have docstrings
122         self.docstrings_by_format = {}
123         self.formatted_docstrings_count = 0
124         self.doctests_count = 0
125         self.unittests_count = 0
126
127         # Initialize lists of format docstrings.
128         for format in supported_formats:
129             self.docstrings_by_format[format] = []
130
131         (path, filename) = os.path.split(pyfile)
132         (module, ext) = os.path.splitext(filename)
133         self.log("Inspecting file: " + pyfile)
134
135         self.system = System()
136         try:
137             processModuleAst(parseFile(pyfile), module, self.system)
138         except Exception, e:
139             self.log("Code parsing error occured:\n***\n%s\n***" % str(e))
140             return
141
142         for obj in self.system.orderedallobjects:
143             fullname = obj.fullName()
144             if isinstance(obj, Module):
145                 self.modules.append(fullname)
146             if isinstance(obj, Class):
147                 if 'unittest.TestCase' in obj.bases or 'TestCase' in obj.bases:
148                     self.unittests_count += 1
149                 self.classes.append(fullname)
150             if isinstance(obj, Function):
151                 self.method_func.append(fullname)
152             if isinstance(obj.docstring, str) and obj.docstring.strip():
153                 self.docstrings.append(fullname)
154                 # Check docstring for known documenation formats.
155                 formatted = False
156                 for format in supported_formats:
157                     if use_format(obj.docstring, format):
158                         self.docstrings_by_format[format].append(fullname)
159                         formatted = True
160                 if formatted:
161                     self.formatted_docstrings_count += 1
162
163                 # Check if docstring include any doctests.
164                 if get_doctests(obj.docstring):
165                     self.doctests_count += 1
166
167         for method_or_func in self.method_func:
168             method_found = 0
169             for cls in self.classes:
170                 if method_or_func.startswith(cls):
171                     self.methods.append(method_or_func)
172                     method_found = 1
173                     break
174             if not method_found:
175                 self.functions.append(method_or_func)
176                
177         self.log("modules: " + ",".join(self.modules))
178         self.log("classes: " + ",".join(self.classes))
179         self.log("methods: " + ",".join(self.methods))
180         self.log("functions: " + ",".join(self.functions))
181         self.log("docstrings: %s" % self.docstrings_by_format)
182         self.log("number of doctests: %d" % self.doctests_count)
183
184     def object_count(self):
185         """Return number of objects found in this module.
186
187         Objects include:
188         * module
189         * classes
190         * methods
191         * functions
192         """
193         module_count = len(self.modules)
194         cls_count = len(self.classes)
195         method_count = len(self.methods)
196         func_count = len(self.functions)
197         return module_count + cls_count + method_count + func_count
198
199     def docstring_count(self):
200         """Return number of docstrings found in this module.
201         """
202         return len(self.docstrings)
203
204     def docstring_count_by_type(self, type):
205         """Return number of docstrings of given type found in this module.
206         """
207         return len(self.docstrings_by_format[type])
208
209     def _functions_called(self):
210         """Return list of functions called by functions/methods
211         defined in this module.
212         """
213         return self.system.func_called.keys()
214
215     functions_called = property(_functions_called)
Note: See TracBrowser for help on using the browser.