aca310e5cc
New Features: In Tab-View mode, Ctrl-number will take the user to the numbered tab view. Modified files now show an '*' astrisk in the view title. Debugger framework can now support PHP debugging. Not important for python development, but at least that means the debugger framework is more generalized. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38852 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
381 lines
14 KiB
Python
381 lines
14 KiB
Python
#----------------------------------------------------------------------------
|
|
# Name: parser.py
|
|
# Purpose: parsing utilities
|
|
#
|
|
# Author: Jeff Norton
|
|
#
|
|
# Created: 8/9/05
|
|
# CVS-ID: $Id$
|
|
# Copyright: (c) 2004-2005 ActiveGrid, Inc.
|
|
# License: wxWindows License
|
|
#----------------------------------------------------------------------------
|
|
|
|
import re
|
|
from activegrid.util.lang import *
|
|
ifDefPy()
|
|
import string
|
|
import array
|
|
endIfDef()
|
|
|
|
XPATH_ROOT_VAR = '__rootObj__'
|
|
GETOBJECTPARTNAMES = ["primaryRef", "ref", "orderings", "limit"]
|
|
|
|
class Tokenizer(object):
|
|
|
|
TOKEN_IDENT = 1
|
|
TOKEN_STRING = 2
|
|
TOKEN_OP = 3
|
|
TOKEN_WS = 4
|
|
## TOKEN_PLACEHOLDER = 5
|
|
|
|
def __init__(self, text, identStart=None, tokenSep=None, ignoreWhitespace=True):
|
|
"""
|
|
Turn a string into individual tokens. Three types of tokens are recognized:
|
|
TOKEN_IDENT: identifiers (those that start with the identStart pattern)
|
|
TOKEN_STRING: quoted string
|
|
TOKEN_OP: everything else
|
|
Tokens are separated by white space or the tokenSep pattern.
|
|
Constructor parameters:
|
|
text: The string to tokenize
|
|
identStart: A regular expression describing characters which start an identifier
|
|
The default expression accepts letters, "_", and "/".
|
|
tokenSep: A regular expression describing the characters which end a token
|
|
(in addition to whitespace). The default expression accepts
|
|
anything except alpha-numerics, "_", "/", and ":".
|
|
Usage:
|
|
Invoke getNextToken (or next) to get the next token. The instance variables
|
|
token, and tokenVal will be populated with the current token type (TOKEN_IDENT,
|
|
TOKEN_STRING, or TOEKN_OP) and value respectively. nextToken and nextTokenVal
|
|
will also be available for lookahead. The next method is similar to
|
|
getNextToken but also returns the token value. A value of None signals end
|
|
of stream.
|
|
"""
|
|
self.ignoreWhitespace=ignoreWhitespace
|
|
ifDefPy()
|
|
if (isinstance(text, array.array)):
|
|
text = text.tostring()
|
|
endIfDef()
|
|
self.text = asString(text)
|
|
self.textIndex = 0
|
|
self.textLen = len(self.text)
|
|
self.token = None
|
|
self.tokenVal = None
|
|
self.nextToken = None
|
|
self.nextTokenVal = None
|
|
if (identStart == None):
|
|
identStart = "[a-zA-Z_/]"
|
|
if (tokenSep == None):
|
|
tokenSep = "[^a-zA-Z0-9_/:]"
|
|
self.identStart = re.compile(identStart)
|
|
self.tokenSep = re.compile(tokenSep)
|
|
self.getNextToken() # Prime the pump
|
|
|
|
def isEscaped(text, index):
|
|
if ((index > 0) and (text[index-1] == '\\') and ((index < 2) or (text[index-2] != '\\'))):
|
|
return True
|
|
return False
|
|
isEscaped = staticmethod(isEscaped)
|
|
|
|
def findClosingQuote(text, index, char):
|
|
index = index + 1
|
|
while True:
|
|
endIndex = text.find(char, index)
|
|
if (endIndex < 1):
|
|
return -1
|
|
if (Tokenizer.isEscaped(text, endIndex)):
|
|
index = endIndex+1
|
|
else:
|
|
break
|
|
return endIndex + 1
|
|
findClosingQuote = staticmethod(findClosingQuote)
|
|
|
|
def _findClosing(self, char):
|
|
if (self.textIndex >= self.textLen):
|
|
raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex))
|
|
index = Tokenizer.findClosingQuote(self.text, self.textIndex, char)
|
|
if (index < 0):
|
|
raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex-1))
|
|
return index
|
|
|
|
def next(self):
|
|
self.getNextToken()
|
|
if (self.token == None):
|
|
raise StopIteration()
|
|
return self.tokenVal
|
|
|
|
def getNextToken(self):
|
|
self.token = self.nextToken
|
|
self.tokenVal = self.nextTokenVal
|
|
while (self.textIndex < self.textLen):
|
|
c = self.text[self.textIndex]
|
|
if (c not in string.whitespace):
|
|
if (c == '"' or c == "'" or c == '`'):
|
|
endIndex = self._findClosing(c)
|
|
self.nextToken = self.TOKEN_STRING
|
|
self.nextTokenVal = self.text[self.textIndex:endIndex]
|
|
self.textIndex = endIndex
|
|
return
|
|
elif (self.identStart.search(c)):
|
|
endMatch = self.tokenSep.search(self.text, self.textIndex+1)
|
|
if (endMatch):
|
|
endIndex = endMatch.start()
|
|
else:
|
|
endIndex = self.textLen
|
|
self.nextToken = self.TOKEN_IDENT
|
|
self.nextTokenVal = self.text[self.textIndex:endIndex]
|
|
self.textIndex = endIndex
|
|
return
|
|
else:
|
|
self.nextToken = self.TOKEN_OP
|
|
endIndex = self.textIndex + 1
|
|
if (c == '<' or c == '>' or c == '!' or c == '='):
|
|
if ((endIndex < self.textLen) and (self.text[endIndex] == '=')):
|
|
endIndex += 1
|
|
elif ((c == '%') and (endIndex < self.textLen)):
|
|
c = self.text[endIndex]
|
|
if (c in ['d', 'i', 'o', 'u', 'x', 'X', 'e', 'E', 'f', 'F', 'g', 'G', 'c', 'r', 's', '%']):
|
|
endIndex += 1
|
|
## self.nextToken = self.TOKEN_PLACEHOLDER # Should really be this but no one can handle it yet
|
|
self.nextTokenVal = self.text[self.textIndex:endIndex]
|
|
self.textIndex = endIndex
|
|
return
|
|
elif not self.ignoreWhitespace:
|
|
self.nextToken=self.TOKEN_WS
|
|
self.nextTokenVal=""
|
|
while c in string.whitespace:
|
|
self.nextTokenVal+=c
|
|
self.textIndex+=1
|
|
if self.textIndex==len(self.text):
|
|
break
|
|
c=self.text[self.textIndex]
|
|
return
|
|
self.textIndex += 1
|
|
self.nextToken = None
|
|
self.nextTokenVal = None
|
|
|
|
def isXPathNonVar(var):
|
|
"""Returns true iff var is a string ("foo" or 'foo') or a number."""
|
|
if (var.startswith("'") and var.endswith("'")) or \
|
|
(var.startswith('"') and var.endswith('"')):
|
|
return True
|
|
|
|
# list from XPathToCode, below
|
|
if var.lower() in ["count", "empty", "true", "false", "null", "and", "or", \
|
|
"like", "not"]:
|
|
return True
|
|
|
|
try:
|
|
t=int(var)
|
|
return True
|
|
except TypeError, e:
|
|
pass
|
|
except ValueError, e:
|
|
pass
|
|
|
|
return False
|
|
|
|
def xpathToCode(xpaths, convertBracket=True):
|
|
if ((xpaths == None) or (len(xpaths) < 1)):
|
|
return "True"
|
|
if (not isinstance(xpaths, (list, tuple))):
|
|
xpaths = [xpaths]
|
|
result = []
|
|
for xpath in xpaths:
|
|
t = Tokenizer(xpath, "[a-zA-Z0-9_/:\.]", "[^a-zA-Z0-9_/:\.]", ignoreWhitespace=False)
|
|
expr = []
|
|
lastToken=None
|
|
while t.nextToken != None:
|
|
t.getNextToken()
|
|
if (t.token == Tokenizer.TOKEN_WS):
|
|
expr.append(" ")
|
|
elif (t.token == Tokenizer.TOKEN_OP):
|
|
if (t.tokenVal == "="):
|
|
expr.append("==")
|
|
elif (t.tokenVal == "[" and convertBracket):
|
|
expr.append("(")
|
|
elif (t.tokenVal == "]" and convertBracket):
|
|
expr.append(")")
|
|
else:
|
|
expr.append(t.tokenVal)
|
|
elif (t.token == Tokenizer.TOKEN_IDENT):
|
|
if (t.tokenVal == "and"):
|
|
expr.append(" and ")
|
|
elif (t.tokenVal == "or"):
|
|
expr.append(" or ")
|
|
elif (t.tokenVal == "not"):
|
|
expr.append(" not ")
|
|
elif (t.tokenVal == "like"):
|
|
# REVIEW stoens@activegrid.com 02-Nov-05 --
|
|
# This is very limited support for like:
|
|
# typically like queries look like this: "foo like 'blah%'".
|
|
# So translate this into "foo.startswith(blah)".
|
|
# We should use a regular expression to support '%'s in
|
|
# arbitrary places in the string. After 1.1.
|
|
if t.nextToken and t.nextTokenVal.endswith("%'"):
|
|
t.getNextToken() # throw away the "like" token
|
|
last = len(expr) - 1
|
|
expr[last] = "%s.startswith(%s')"\
|
|
% (expr[last], t.tokenVal[:-2])
|
|
else:
|
|
# old behavior
|
|
expr.append(t.tokenVal)
|
|
|
|
elif (t.tokenVal == "count"):
|
|
expr.append("len")
|
|
elif (t.tokenVal == 'empty'):
|
|
expr.append('ctx.isEmptyPath')
|
|
elif (t.tokenVal == 'true'):
|
|
expr.append(_parseConstantFunction(t, 'True'))
|
|
elif (t.tokenVal == 'false'):
|
|
expr.append(_parseConstantFunction(t, 'False'))
|
|
elif (t.tokenVal == 'null'):
|
|
expr.append(_parseConstantFunction(t, 'None'))
|
|
elif (-1!=t.tokenVal.find(':')):
|
|
serviceDef, args=_parseServiceFunction(t)
|
|
|
|
# XXX handle serviceDef, args being None
|
|
|
|
for i in range(len(args)):
|
|
args[i]=xpathToCode(args[i], False)
|
|
jargs="[%s]" % (",".join(args))
|
|
|
|
# XXX should be processmodel.DATASERVICE_PROCESS_NAME, not "dataservice"
|
|
if serviceDef[0]=='dataservice':
|
|
expr.append("runtimesupport.invokeDataServiceWrapper(%s, %s, ctx, locals())" % \
|
|
(serviceDef, jargs))
|
|
else:
|
|
expr.append("runtimesupport.invokeServiceWrapper(%s, %s, ctx)" % \
|
|
(serviceDef, jargs))
|
|
else:
|
|
if (lastToken==')' or lastToken==']'):
|
|
wasFunc=True
|
|
else:
|
|
wasFunc=False
|
|
if (t.tokenVal.startswith('/')) and not wasFunc:
|
|
expr.append(XPATH_ROOT_VAR)
|
|
expr.append(t.tokenVal.replace('/','.'))
|
|
lastToken=t.tokenVal
|
|
else:
|
|
expr.append(t.tokenVal)
|
|
|
|
|
|
if (len(expr) == 2 and expr[0]==" "):
|
|
expr = "".join(expr)
|
|
result.append(expr)
|
|
elif (len(expr) > 1):
|
|
expr = "".join(expr)
|
|
result.append("(%s)" % expr)
|
|
elif (len(expr) > 0):
|
|
result.append(expr[0])
|
|
|
|
return " and ".join(result)
|
|
|
|
def _parseArgs(t):
|
|
args=[]
|
|
argcon=""
|
|
|
|
if t.tokenVal!='(':
|
|
return []
|
|
if t.nextTokenVal==')':
|
|
t.getNextToken()
|
|
return []
|
|
|
|
depth=1
|
|
|
|
while(depth!=0):
|
|
if not t.nextToken:
|
|
raise Exception("parameters list with no closing ) after token: %s" % t.tokenVal)
|
|
t.getNextToken()
|
|
|
|
if t.tokenVal=='(':
|
|
depth+=1
|
|
if t.tokenVal==')':
|
|
depth-=1
|
|
|
|
if depth==0 or (depth==1 and t.tokenVal==','):
|
|
args.append(argcon)
|
|
argcon=""
|
|
else:
|
|
argcon+=t.tokenVal
|
|
return args
|
|
|
|
def _parseServiceFunction(t):
|
|
"""Parses what appears to be a service function call into serviceDefs and args lists.
|
|
|
|
Returns None, None if the serviceFunction appears to be invalid.
|
|
"""
|
|
if t.nextTokenVal!='(':
|
|
return t.tokenVal, None
|
|
|
|
serviceDef=t.tokenVal.split(':')
|
|
t.getNextToken()
|
|
args=_parseArgs(t)
|
|
|
|
return serviceDef, args
|
|
|
|
def _parseConstantFunction(t, outputValue):
|
|
firstVal = t.tokenVal
|
|
if t.nextTokenVal != '(':
|
|
return firstVal
|
|
t.getNextToken()
|
|
if t.nextTokenVal != ')':
|
|
return "%s%s" % (firstVal, '(')
|
|
t.getNextToken()
|
|
return outputValue
|
|
|
|
def parseDSPredicate(ctx, str, vars, valueList=None):
|
|
from activegrid.util.utillang import evalCode
|
|
from activegrid.util.utillang import ObjAsDict
|
|
|
|
if valueList == None:
|
|
valueList = []
|
|
indexVar=0
|
|
oldIndexVar=0
|
|
sourceStr=str
|
|
inlinedPredicate=[]
|
|
qualifications=[]
|
|
while True:
|
|
oldIndexVar = indexVar
|
|
dollarCurlForm = False
|
|
quoted = False
|
|
indexVar = sourceStr.find("bpws:getVariableData", indexVar)
|
|
if indexVar == -1:
|
|
indexVar = sourceStr.find("${", oldIndexVar)
|
|
if indexVar == -1:
|
|
break
|
|
dollarCurlForm = True
|
|
if indexVar > 0 and sourceStr[indexVar-1] in ('"',"'"):
|
|
quoted = True
|
|
if not dollarCurlForm:
|
|
openParen = sourceStr.find("(", indexVar)
|
|
if openParen == -1:
|
|
break
|
|
closeParen = sourceStr.find(")", openParen)
|
|
if closeParen == -1:
|
|
break
|
|
else:
|
|
openParen = indexVar+1
|
|
closeParen = sourceStr.find("}", openParen)
|
|
if closeParen == -1:
|
|
break
|
|
varRef = sourceStr[openParen+1: closeParen]
|
|
if varRef.startswith('"') or varRef.startswith("'"):
|
|
varRef = varRef[1:]
|
|
if varRef.endswith('"') or varRef.endswith("'"):
|
|
varRef = varRef[:-1]
|
|
if isinstance(vars, dict) or isinstance(vars, ObjAsDict):
|
|
varRefCode = xpathToCode(varRef)
|
|
value = evalCode(varRefCode, vars)
|
|
else:
|
|
value = ctx.evalPath(vars, varRef)
|
|
inlinedPredicate.append(sourceStr[oldIndexVar:indexVar])
|
|
if quoted:
|
|
inlinedPredicate.append("%s" % value)
|
|
else:
|
|
inlinedPredicate.append('%s')
|
|
valueList.append(value)
|
|
indexVar = closeParen+1
|
|
inlinedPredicate.append(sourceStr[oldIndexVar:])
|
|
qualifications.append(''.join(inlinedPredicate))
|
|
return qualifications, valueList
|