import sys, os, string, glob import re from docparser.wxclasses import * import wx outputdir = "output" # # Class REs # class_desc_re = """

.*?

(.*?)""" win_styles_re = """Window styles

(.*?)""" win_styles_extra_re = """Extra window styles

(.*?)""" win_style_re = """\s*?\s*?(.*?)\s*?\s*?\s*?(.*?)""" derived_re = """Derived from

(.*?)

""" derived_class_re = """(.*?)""" # # Method REs # # groups - header, description method_re = "

(.*?)

\s*?

(.*?)


" lastmethod_re = "

(.*?)

\s*?

(.*?)\s*?

\s*?" headings_re = "(.*?)

(.*?)" # groups = param name, param value param_re = "(.*?)

      (.*?)
" # groups - return type, method name, arguments proto_re = "(.*?).*?(.*?)\s*?\((.*?)\)" # groups - arg type, arg name args_re = "(.*?).*?(.*?)" code_re = "
(.*?)
" link_re = "(.*?)
" # # wxPython/wxPerl note REs # wx_re = "wx[A-Z]\S+" wxperl_overload_re = "wxPerl note: In wxPerl there are two methods instead of a single overloaded method:

\s*?

      (.*?)
" wxperl_re = "wxPerl note:(.*?)

" wxpython_constructors_re = """wxPython note: Constructors supported by wxPython are:

\s*?

      (.*?)
""" wxpython_overload_re = """\s*?\s*?(.*?)\s*?\s*?\s*?(.*?)""" wxpython_overloads_re = "wxPython note: In place of a single overloaded method name, wxPython\s*?implements the following methods:

\s*?

      (.*?)
" wxpython_re = "wxPython note:(.*?)

" # convert wxWhatever to wx.Whatever def namespacify_wxClasses(contents): wx_regex = re.compile(wx_re, re.MULTILINE | re.DOTALL) result = wx_regex.sub(wxReplaceFunc, contents) return result def wxReplaceFunc(match): text = match.group() if text.find("wxWidgets") == -1 and text.find("wxPython") == -1 and text.find("wxPerl") == -1: text = text.replace("wx", "wx.") return text # Methods to de-C++itize data. def pythonize_text(contents): """ Remove C++isms that definitely shouldn't be in any text. """ contents = contents.replace("false", "False") contents = contents.replace("true", "True") contents = contents.replace("non-NULL", "not None") contents = contents.replace("NULL", "None") contents = contents.replace("const ", "") contents = contents.replace("::", ".") contents = contents.replace("\r\n", "\n") contents = contents.replace("\r", "\n") contents = contents.replace("''", "\"") return namespacify_wxClasses(contents) def pythonize_args(contents): """ Remove C++isms from arguments (some of these terms may be used in other contexts in actual documentation, so we don't remove them there). """ contents = contents.replace("static", "") contents = contents.replace("virtual void", "") contents = contents.replace("virtual", "") contents = contents.replace("void*", "int") contents = contents.replace("void", "") contents = contents.replace("off_t", "long") contents = contents.replace("size_t", "long") contents = contents.replace("*", "") contents = contents.replace("&", "") contents = contents.replace("&", "") contents = contents.replace("char", "string") contents = contents.replace("wxChar", "string") contents = contents.replace("wxCoord", "int") contents = contents.replace("wxString", "string") return pythonize_text(contents) def formatMethodProtos(protos): """ Remove C++isms in the method prototypes. """ for proto in protos: proto[0] = pythonize_args(proto[0]) proto[0] = proto[0].strip() proto[1] = namespacify_wxClasses(proto[1]) for arg in proto[2]: arg[0] = pythonize_args(arg[0]) arg[0].strip() # for arg names, we should be more careful about what we replace arg[1] = pythonize_text(arg[1]) arg[1] = arg[1].replace("*", "") arg[1] = arg[1].replace("&", "") return protos # functions for getting data from methods def getMethodWxPythonOverrides(text, isConstructor=False): overloads_re = wxpython_overloads_re if isConstructor: overloads_re = wxpython_constructors_re overload_regex = re.compile(overloads_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) match = overload_regex.search(text, 0) note = "" start = -1 end = -1 overrides = [] if match: def getWxPythonOverridesFromMatch(match): return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))] start = match.start() end = match.end() overrides, returntext = findAllMatches(wxpython_overload_re, match.group(1), getWxPythonOverridesFromMatch) returntext = text if start != -1 and end != -1: #print "note is: " + text[start:end] returntext = text.replace(text[start:end], "") return overrides, returntext def getMethodWxPythonNote(text): python_regex = re.compile(wxpython_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) match = python_regex.search(text) start = -1 end = -1 note = "" if match: start = match.start() end = match.end() note = match.group(1) returntext = text if start != -1 and end != -1: #print "note is: " + text[start:end] returntext = text.replace(text[start:end], "") return note, returntext def findAllMatches(re_string, text, handler, start=0): """ findAllMatches finds matches for a given regex, then runs the handler function on each match, and returns a list of objects, along with a version of the text with the area matches were found stripped. Note the stripping of text is not generally usable yet, it assumes matches are in continuous blocks, which is true of the wx docs. """ regex = re.compile(re_string, re.MULTILINE | re.DOTALL | re.IGNORECASE) match = regex.search(text, start) results = [] startpoint = -1 endpoint = -1 if match: startpoint = match.start() while match: start = match.end() results.append(handler(match)) endpoint = match.end() match = regex.search(text, start) returntext = text if startpoint != -1 and endpoint != -1: returntext = text.replace(text[startpoint:endpoint], "") return results, returntext def getMethodParams(text): paramstart = text.find("Parameters

") params, returntext = findAllMatches(param_re, text, getMethodParamsFromMatch, paramstart) return params, returntext def getMethodParamsFromMatch(match): return [match.group(1).strip(), pythonize_text(match.group(2)).strip()] def getPrototypeFromMatch(match): return [match.group(1), match.group(2), getProtoArgs(match.group(3))] def getProtoArgsFromMatch(match): return [match.group(1), match.group(2)] # These methods parse the docs, finding matches and then using the FromMatch # functions to parse the data. After that, the results are "Pythonized" # by removing C++isms. def getMethodProtos(text): protos, returntext = findAllMatches(proto_re, text, getPrototypeFromMatch) return formatMethodProtos(protos), returntext def getProtoArgs(text): args, returntext = findAllMatches(args_re, text, getProtoArgsFromMatch) return args def getMethodDesc(text): heading_text = "" return_text = text end = text.find(heading_text) if end != -1: return_text = text[0:end] return pythonize_text(return_text) def removeWxPerlNotes(text): perl_overload_regex = re.compile(wxperl_overload_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) result = perl_overload_regex.sub("", text) perl_regex = re.compile(wxperl_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) result = perl_regex.sub("", result) return result def removeCPPCode(text): code_regex = re.compile(code_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) result = code_regex.sub("", text) return result def getMethod(match, parent): name = match.group(1) if name.find("::") != -1: name = name.split("::")[1] name = namespacify_wxClasses(name).strip() start = match.end() protos, remainder = getMethodProtos(match.group(2)) isConstructor = False #print "name: %s, parent name: %s" % (name, parent.name) if name == parent.name.replace("wx", "wx."): isConstructor = True overrides, remainder = getMethodWxPythonOverrides(remainder, isConstructor) note, remainder = getMethodWxPythonNote(remainder) params, remainder = getMethodParams(remainder) desc = getMethodDesc(remainder) method = wxMethod(name, parent, protos, params, desc) method.pythonNote = note method.pythonOverrides = overrides if len(method.pythonOverrides) > 0: print "has overrides!\n\n\n\n" return method def getClassDerivedFrom(text): def getDerivedClassesFromMatch(match): return namespacify_wxClasses(match.group(1)) derived_classes = [] derived_regex = re.compile(derived_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) match = derived_regex.search(text) if match: derived_classes, returntext = findAllMatches(derived_class_re, match.group(1), getDerivedClassesFromMatch) return derived_classes def getClassDescription(text): def getClassDescriptionFromMatch(match): return match.group(1) desc, returntext = findAllMatches(class_desc_re, text, getClassDescriptionFromMatch) return pythonize_text(desc[0]) def getClassStyles(text, extraStyles=False): styles_re = win_styles_re if extraStyles: styles_re = win_styles_extra_re styles_regex = re.compile(styles_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) match = styles_regex.search(text) styles = [] if match: def getClassStyleFromMatch(match): return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))] styles, remainder = findAllMatches(win_style_re, match.group(1), getClassStyleFromMatch) return styles # Main functions - these drive the process. def getClassMethods(doc, parent): contents = open(doc, "rb").read() # get rid of some particularly tricky parts before parsing contents = contents.replace("const", "") contents = removeWxPerlNotes(contents) contents = removeCPPCode(contents) method_regex = re.compile(method_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) match = method_regex.search(contents) start = 0 methods = {} while match: start = match.end() newmethod = getMethod(match, parent) basename = parent.name.replace("wx", "") isConstructor = (basename == newmethod.name.replace("wx.", "")) if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename): print "Adding %s.%s" % (parent.name, newmethod.name) methods[newmethod.name] = newmethod match = method_regex.search(contents, start) lastmethod_regex = re.compile(lastmethod_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) match = lastmethod_regex.search(contents, start) if match: newmethod = getMethod(match, parent) basename = parent.name.replace("wx", "") isConstructor = (basename == newmethod.name.replace("wx.", "")) if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename): print "Adding %s.%s" % (parent.name, newmethod.name) methods[newmethod.name] = newmethod for name in methods: if name[0:3] == "Get": propname = name[3:] basename = parent.name.replace("wx", "") if not propname in eval("dir(wx.%s)" % basename): parent.props.append(propname) else: parent.propConflicts.append(parent.name + "." + propname) # get rid of the destructor and operator methods ignore_methods = ["~" + namespacify_wxClasses(parent.name), "operator ==", "operator <<", "operator >>", "operator =", "operator !=", "operator*", "operator++" ] for method in ignore_methods: if method in methods: methods.pop(method) return methods def getClasses(doc): global docspath contents = open(doc, "rb").read() link_regex = re.compile(link_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) start = contents.find("

Alphabetical class reference

") result = link_regex.search(contents, start) classes = {} while result: start = result.end() name = result.group(2).strip() classpage = result.group(1).split("#")[0] basename = name.replace("wx", "") if basename in dir(wx): classfile = os.path.join(os.path.dirname(doc), classpage) classtext = open(classfile, "rb").read() derivedClasses = getClassDerivedFrom(classtext) description = getClassDescription(classtext) styles = getClassStyles(classtext) extra_styles = getClassStyles(classtext, extraStyles=True) classes[name] = wxClass(name, description, derivedClasses, styles, extra_styles) classes[name].methods = getClassMethods(classfile, classes[name]) result = link_regex.search(contents, start) return classes