wxWidgets/wxPython/samples/ide/activegrid/util/parser.py
Robin Dunn aca310e5cc DocView and ActiveGrid IDE updates from Morgan Hua:
New Features: In Tab-View mode, Ctrl-number will take the user to
    the numbered tab view.  Modified files now show an '*' astrisk in
    the view title.  Debugger framework can now support PHP debugging.
    Not important for python development, but at least that means the
    debugger framework is more generalized.


git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38852 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
2006-04-20 06:26:03 +00:00

381 lines
14 KiB
Python

#----------------------------------------------------------------------------
# Name: parser.py
# Purpose: parsing utilities
#
# Author: Jeff Norton
#
# Created: 8/9/05
# CVS-ID: $Id$
# Copyright: (c) 2004-2005 ActiveGrid, Inc.
# License: wxWindows License
#----------------------------------------------------------------------------
import re
from activegrid.util.lang import *
ifDefPy()
import string
import array
endIfDef()
XPATH_ROOT_VAR = '__rootObj__'
GETOBJECTPARTNAMES = ["primaryRef", "ref", "orderings", "limit"]
class Tokenizer(object):
TOKEN_IDENT = 1
TOKEN_STRING = 2
TOKEN_OP = 3
TOKEN_WS = 4
## TOKEN_PLACEHOLDER = 5
def __init__(self, text, identStart=None, tokenSep=None, ignoreWhitespace=True):
"""
Turn a string into individual tokens. Three types of tokens are recognized:
TOKEN_IDENT: identifiers (those that start with the identStart pattern)
TOKEN_STRING: quoted string
TOKEN_OP: everything else
Tokens are separated by white space or the tokenSep pattern.
Constructor parameters:
text: The string to tokenize
identStart: A regular expression describing characters which start an identifier
The default expression accepts letters, "_", and "/".
tokenSep: A regular expression describing the characters which end a token
(in addition to whitespace). The default expression accepts
anything except alpha-numerics, "_", "/", and ":".
Usage:
Invoke getNextToken (or next) to get the next token. The instance variables
token, and tokenVal will be populated with the current token type (TOKEN_IDENT,
TOKEN_STRING, or TOEKN_OP) and value respectively. nextToken and nextTokenVal
will also be available for lookahead. The next method is similar to
getNextToken but also returns the token value. A value of None signals end
of stream.
"""
self.ignoreWhitespace=ignoreWhitespace
ifDefPy()
if (isinstance(text, array.array)):
text = text.tostring()
endIfDef()
self.text = asString(text)
self.textIndex = 0
self.textLen = len(self.text)
self.token = None
self.tokenVal = None
self.nextToken = None
self.nextTokenVal = None
if (identStart == None):
identStart = "[a-zA-Z_/]"
if (tokenSep == None):
tokenSep = "[^a-zA-Z0-9_/:]"
self.identStart = re.compile(identStart)
self.tokenSep = re.compile(tokenSep)
self.getNextToken() # Prime the pump
def isEscaped(text, index):
if ((index > 0) and (text[index-1] == '\\') and ((index < 2) or (text[index-2] != '\\'))):
return True
return False
isEscaped = staticmethod(isEscaped)
def findClosingQuote(text, index, char):
index = index + 1
while True:
endIndex = text.find(char, index)
if (endIndex < 1):
return -1
if (Tokenizer.isEscaped(text, endIndex)):
index = endIndex+1
else:
break
return endIndex + 1
findClosingQuote = staticmethod(findClosingQuote)
def _findClosing(self, char):
if (self.textIndex >= self.textLen):
raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex))
index = Tokenizer.findClosingQuote(self.text, self.textIndex, char)
if (index < 0):
raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex-1))
return index
def next(self):
self.getNextToken()
if (self.token == None):
raise StopIteration()
return self.tokenVal
def getNextToken(self):
self.token = self.nextToken
self.tokenVal = self.nextTokenVal
while (self.textIndex < self.textLen):
c = self.text[self.textIndex]
if (c not in string.whitespace):
if (c == '"' or c == "'" or c == '`'):
endIndex = self._findClosing(c)
self.nextToken = self.TOKEN_STRING
self.nextTokenVal = self.text[self.textIndex:endIndex]
self.textIndex = endIndex
return
elif (self.identStart.search(c)):
endMatch = self.tokenSep.search(self.text, self.textIndex+1)
if (endMatch):
endIndex = endMatch.start()
else:
endIndex = self.textLen
self.nextToken = self.TOKEN_IDENT
self.nextTokenVal = self.text[self.textIndex:endIndex]
self.textIndex = endIndex
return
else:
self.nextToken = self.TOKEN_OP
endIndex = self.textIndex + 1
if (c == '<' or c == '>' or c == '!' or c == '='):
if ((endIndex < self.textLen) and (self.text[endIndex] == '=')):
endIndex += 1
elif ((c == '%') and (endIndex < self.textLen)):
c = self.text[endIndex]
if (c in ['d', 'i', 'o', 'u', 'x', 'X', 'e', 'E', 'f', 'F', 'g', 'G', 'c', 'r', 's', '%']):
endIndex += 1
## self.nextToken = self.TOKEN_PLACEHOLDER # Should really be this but no one can handle it yet
self.nextTokenVal = self.text[self.textIndex:endIndex]
self.textIndex = endIndex
return
elif not self.ignoreWhitespace:
self.nextToken=self.TOKEN_WS
self.nextTokenVal=""
while c in string.whitespace:
self.nextTokenVal+=c
self.textIndex+=1
if self.textIndex==len(self.text):
break
c=self.text[self.textIndex]
return
self.textIndex += 1
self.nextToken = None
self.nextTokenVal = None
def isXPathNonVar(var):
"""Returns true iff var is a string ("foo" or 'foo') or a number."""
if (var.startswith("'") and var.endswith("'")) or \
(var.startswith('"') and var.endswith('"')):
return True
# list from XPathToCode, below
if var.lower() in ["count", "empty", "true", "false", "null", "and", "or", \
"like", "not"]:
return True
try:
t=int(var)
return True
except TypeError, e:
pass
except ValueError, e:
pass
return False
def xpathToCode(xpaths, convertBracket=True):
if ((xpaths == None) or (len(xpaths) < 1)):
return "True"
if (not isinstance(xpaths, (list, tuple))):
xpaths = [xpaths]
result = []
for xpath in xpaths:
t = Tokenizer(xpath, "[a-zA-Z0-9_/:\.]", "[^a-zA-Z0-9_/:\.]", ignoreWhitespace=False)
expr = []
lastToken=None
while t.nextToken != None:
t.getNextToken()
if (t.token == Tokenizer.TOKEN_WS):
expr.append(" ")
elif (t.token == Tokenizer.TOKEN_OP):
if (t.tokenVal == "="):
expr.append("==")
elif (t.tokenVal == "[" and convertBracket):
expr.append("(")
elif (t.tokenVal == "]" and convertBracket):
expr.append(")")
else:
expr.append(t.tokenVal)
elif (t.token == Tokenizer.TOKEN_IDENT):
if (t.tokenVal == "and"):
expr.append(" and ")
elif (t.tokenVal == "or"):
expr.append(" or ")
elif (t.tokenVal == "not"):
expr.append(" not ")
elif (t.tokenVal == "like"):
# REVIEW stoens@activegrid.com 02-Nov-05 --
# This is very limited support for like:
# typically like queries look like this: "foo like 'blah%'".
# So translate this into "foo.startswith(blah)".
# We should use a regular expression to support '%'s in
# arbitrary places in the string. After 1.1.
if t.nextToken and t.nextTokenVal.endswith("%'"):
t.getNextToken() # throw away the "like" token
last = len(expr) - 1
expr[last] = "%s.startswith(%s')"\
% (expr[last], t.tokenVal[:-2])
else:
# old behavior
expr.append(t.tokenVal)
elif (t.tokenVal == "count"):
expr.append("len")
elif (t.tokenVal == 'empty'):
expr.append('ctx.isEmptyPath')
elif (t.tokenVal == 'true'):
expr.append(_parseConstantFunction(t, 'True'))
elif (t.tokenVal == 'false'):
expr.append(_parseConstantFunction(t, 'False'))
elif (t.tokenVal == 'null'):
expr.append(_parseConstantFunction(t, 'None'))
elif (-1!=t.tokenVal.find(':')):
serviceDef, args=_parseServiceFunction(t)
# XXX handle serviceDef, args being None
for i in range(len(args)):
args[i]=xpathToCode(args[i], False)
jargs="[%s]" % (",".join(args))
# XXX should be processmodel.DATASERVICE_PROCESS_NAME, not "dataservice"
if serviceDef[0]=='dataservice':
expr.append("runtimesupport.invokeDataServiceWrapper(%s, %s, ctx, locals())" % \
(serviceDef, jargs))
else:
expr.append("runtimesupport.invokeServiceWrapper(%s, %s, ctx)" % \
(serviceDef, jargs))
else:
if (lastToken==')' or lastToken==']'):
wasFunc=True
else:
wasFunc=False
if (t.tokenVal.startswith('/')) and not wasFunc:
expr.append(XPATH_ROOT_VAR)
expr.append(t.tokenVal.replace('/','.'))
lastToken=t.tokenVal
else:
expr.append(t.tokenVal)
if (len(expr) == 2 and expr[0]==" "):
expr = "".join(expr)
result.append(expr)
elif (len(expr) > 1):
expr = "".join(expr)
result.append("(%s)" % expr)
elif (len(expr) > 0):
result.append(expr[0])
return " and ".join(result)
def _parseArgs(t):
args=[]
argcon=""
if t.tokenVal!='(':
return []
if t.nextTokenVal==')':
t.getNextToken()
return []
depth=1
while(depth!=0):
if not t.nextToken:
raise Exception("parameters list with no closing ) after token: %s" % t.tokenVal)
t.getNextToken()
if t.tokenVal=='(':
depth+=1
if t.tokenVal==')':
depth-=1
if depth==0 or (depth==1 and t.tokenVal==','):
args.append(argcon)
argcon=""
else:
argcon+=t.tokenVal
return args
def _parseServiceFunction(t):
"""Parses what appears to be a service function call into serviceDefs and args lists.
Returns None, None if the serviceFunction appears to be invalid.
"""
if t.nextTokenVal!='(':
return t.tokenVal, None
serviceDef=t.tokenVal.split(':')
t.getNextToken()
args=_parseArgs(t)
return serviceDef, args
def _parseConstantFunction(t, outputValue):
firstVal = t.tokenVal
if t.nextTokenVal != '(':
return firstVal
t.getNextToken()
if t.nextTokenVal != ')':
return "%s%s" % (firstVal, '(')
t.getNextToken()
return outputValue
def parseDSPredicate(ctx, str, vars, valueList=None):
from activegrid.util.utillang import evalCode
from activegrid.util.utillang import ObjAsDict
if valueList == None:
valueList = []
indexVar=0
oldIndexVar=0
sourceStr=str
inlinedPredicate=[]
qualifications=[]
while True:
oldIndexVar = indexVar
dollarCurlForm = False
quoted = False
indexVar = sourceStr.find("bpws:getVariableData", indexVar)
if indexVar == -1:
indexVar = sourceStr.find("${", oldIndexVar)
if indexVar == -1:
break
dollarCurlForm = True
if indexVar > 0 and sourceStr[indexVar-1] in ('"',"'"):
quoted = True
if not dollarCurlForm:
openParen = sourceStr.find("(", indexVar)
if openParen == -1:
break
closeParen = sourceStr.find(")", openParen)
if closeParen == -1:
break
else:
openParen = indexVar+1
closeParen = sourceStr.find("}", openParen)
if closeParen == -1:
break
varRef = sourceStr[openParen+1: closeParen]
if varRef.startswith('"') or varRef.startswith("'"):
varRef = varRef[1:]
if varRef.endswith('"') or varRef.endswith("'"):
varRef = varRef[:-1]
if isinstance(vars, dict) or isinstance(vars, ObjAsDict):
varRefCode = xpathToCode(varRef)
value = evalCode(varRefCode, vars)
else:
value = ctx.evalPath(vars, varRef)
inlinedPredicate.append(sourceStr[oldIndexVar:indexVar])
if quoted:
inlinedPredicate.append("%s" % value)
else:
inlinedPredicate.append('%s')
valueList.append(value)
indexVar = closeParen+1
inlinedPredicate.append(sourceStr[oldIndexVar:])
qualifications.append(''.join(inlinedPredicate))
return qualifications, valueList