wxWidgets/wxPython/samples/ide/activegrid/util/parser.py

#----------------------------------------------------------------------------
# Name:         parser.py
# Purpose:      parsing utilities
#
# Author:       Jeff Norton
#
# Created:      8/9/05
# CVS-ID:       $Id$
# Copyright:    (c) 2004-2005 ActiveGrid, Inc.
# License:      wxWindows License
#----------------------------------------------------------------------------

import re
from activegrid.util.lang import *
ifDefPy()
import string
import array
endIfDef()

XPATH_ROOT_VAR = '__rootObj__'
GETOBJECTPARTNAMES  =   ["primaryRef", "ref", "orderings", "limit"]

class Tokenizer(object):

    TOKEN_IDENT = 1
    TOKEN_STRING = 2
    TOKEN_OP = 3
    TOKEN_WS = 4
##    TOKEN_PLACEHOLDER = 5

    def __init__(self, text, identStart=None, tokenSep=None, ignoreWhitespace=True):
        """
Turn a string into individual tokens.  Three types of tokens are recognized:
    TOKEN_IDENT:   identifiers (those that start with the identStart pattern)
    TOKEN_STRING:  quoted string
    TOKEN_OP:      everything else
Tokens are separated by white space or the tokenSep pattern.
Constructor parameters:
    text:  The string to tokenize
    identStart:  A regular expression describing characters which start an identifier
                 The default expression accepts letters, "_", and "/".
    tokenSep:    A regular expression describing the characters which end a token
                 (in addition to whitespace).  The default expression accepts
                 anything except alpha-numerics, "_", "/", and ":".
Usage:
    Invoke getNextToken (or next) to get the next token.  The instance variables
    token, and tokenVal will be populated with the current token type (TOKEN_IDENT,
    TOKEN_STRING, or TOEKN_OP) and value respectively.  nextToken and nextTokenVal
    will also be available for lookahead.   The next method is similar to
    getNextToken but also returns the token value.  A value of None signals end
    of stream.
        """
        self.ignoreWhitespace=ignoreWhitespace
        ifDefPy()
        if (isinstance(text, array.array)):
            text = text.tostring()
        endIfDef()
        self.text = asString(text)
        self.textIndex = 0
        self.textLen = len(self.text)
        self.token = None
        self.tokenVal = None
        self.nextToken = None
        self.nextTokenVal = None
        if (identStart == None):
            identStart = "[a-zA-Z_/]"
        if (tokenSep == None):
            tokenSep = "[^a-zA-Z0-9_/:]"
        self.identStart = re.compile(identStart)
        self.tokenSep = re.compile(tokenSep)
        self.getNextToken() # Prime the pump

    def isEscaped(text, index):
        if ((index > 0) and (text[index-1] == '\\') and ((index < 2) or (text[index-2] != '\\'))):
            return True
        return False
    isEscaped = staticmethod(isEscaped)

    def findClosingQuote(text, index, char):
        index = index + 1
        while True:
            endIndex = text.find(char, index)
            if (endIndex < 1):
                return -1
            if (Tokenizer.isEscaped(text, endIndex)):
                index = endIndex+1
            else:
                break
        return endIndex + 1
    findClosingQuote = staticmethod(findClosingQuote)

    def _findClosing(self, char):
        if (self.textIndex >= self.textLen):
            raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex))
        index = Tokenizer.findClosingQuote(self.text, self.textIndex, char)
        if (index < 0):
            raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex-1))
        return index

    def next(self):
        self.getNextToken()
        if (self.token == None):
            raise StopIteration()
        return self.tokenVal

    def getNextToken(self):
        self.token = self.nextToken
        self.tokenVal = self.nextTokenVal
        while (self.textIndex < self.textLen):
            c = self.text[self.textIndex]
            if (c not in string.whitespace):
                if (c == '"' or c == "'" or c == '`'):
                    endIndex = self._findClosing(c)
                    self.nextToken = self.TOKEN_STRING
                    self.nextTokenVal = self.text[self.textIndex:endIndex]
                    self.textIndex = endIndex
                    return
                elif (self.identStart.search(c)):
                    endMatch = self.tokenSep.search(self.text, self.textIndex+1)
                    if (endMatch):
                        endIndex = endMatch.start()
                    else:
                        endIndex = self.textLen
                    self.nextToken = self.TOKEN_IDENT
                    self.nextTokenVal = self.text[self.textIndex:endIndex]
                    self.textIndex = endIndex
                    return
                else:
                    self.nextToken = self.TOKEN_OP
                    endIndex = self.textIndex + 1
                    if (c == '<' or c == '>' or c == '!' or c == '='):
                        if ((endIndex < self.textLen) and (self.text[endIndex] == '=')):
                            endIndex += 1
                    elif ((c == '%') and (endIndex < self.textLen)):
                        c = self.text[endIndex]
                        if (c in ['d', 'i', 'o', 'u', 'x', 'X', 'e', 'E', 'f', 'F', 'g', 'G', 'c', 'r', 's', '%']):
                            endIndex += 1
##                            self.nextToken = self.TOKEN_PLACEHOLDER # Should really be this but no one can handle it yet
                    self.nextTokenVal = self.text[self.textIndex:endIndex]
                    self.textIndex = endIndex
                    return
            elif not self.ignoreWhitespace:
                self.nextToken=self.TOKEN_WS
                self.nextTokenVal=""
                while c in string.whitespace:
                    self.nextTokenVal+=c
                    self.textIndex+=1
                    if self.textIndex==len(self.text):
                        break
                    c=self.text[self.textIndex]
                return
            self.textIndex += 1
        self.nextToken = None
        self.nextTokenVal = None

def isXPathNonVar(var):
    """Returns true iff var is a string ("foo" or 'foo') or a number."""
    if (var.startswith("'") and var.endswith("'")) or \
            (var.startswith('"') and var.endswith('"')):
        return True

    # list from XPathToCode, below
    if var.lower() in ["count", "empty", "true", "false", "null", "and", "or", \
            "like", "not"]:
        return True

    try:
        t=int(var)
        return True
    except TypeError, e:
        pass
    except ValueError, e:
        pass

    return False

def xpathToCode(xpaths, convertBracket=True):
    if ((xpaths == None) or (len(xpaths) < 1)):
        return "True"
    if (not isinstance(xpaths, (list, tuple))):
        xpaths = [xpaths]
    result = []
    for xpath in xpaths:
        t = Tokenizer(xpath, "[a-zA-Z0-9_/:\.]", "[^a-zA-Z0-9_/:\.]", ignoreWhitespace=False)
        expr = []
        lastToken=None
        while t.nextToken != None:
            t.getNextToken()
            if (t.token == Tokenizer.TOKEN_WS):
                expr.append(" ")
            elif (t.token == Tokenizer.TOKEN_OP):
                if (t.tokenVal == "="):
                    expr.append("==")
                elif (t.tokenVal == "[" and convertBracket):
                    expr.append("(")
                elif (t.tokenVal == "]" and convertBracket):
                    expr.append(")")
                else:
                    expr.append(t.tokenVal)
            elif (t.token == Tokenizer.TOKEN_IDENT):
                if (t.tokenVal == "and"):
                    expr.append(" and ")
                elif (t.tokenVal == "or"):
                    expr.append(" or ")
                elif (t.tokenVal == "not"):
                    expr.append(" not ")
                elif (t.tokenVal == "like"):
                    # REVIEW stoens@activegrid.com 02-Nov-05 --
                    # This is very limited support for like:
                    # typically like queries look like this: "foo like 'blah%'".
                    # So translate this into "foo.startswith(blah)".
                    # We should use a regular expression to support '%'s in
                    # arbitrary places in the string. After 1.1.
                    if t.nextToken and t.nextTokenVal.endswith("%'"):
                        t.getNextToken() # throw away the "like" token
                        last = len(expr) - 1
                        expr[last] = "%s.startswith(%s')"\
                            % (expr[last], t.tokenVal[:-2])
                    else:
                        # old behavior
                        expr.append(t.tokenVal)

                elif (t.tokenVal == "count"):
                    expr.append("len")
                elif (t.tokenVal == 'empty'):
                    expr.append('ctx.isEmptyPath')
                elif (t.tokenVal == 'true'):
                    expr.append(_parseConstantFunction(t, 'True'))
                elif (t.tokenVal == 'false'):
                    expr.append(_parseConstantFunction(t, 'False'))
                elif (t.tokenVal == 'null'):
                    expr.append(_parseConstantFunction(t, 'None'))
                elif (-1!=t.tokenVal.find(':')):
                    serviceDef, args=_parseServiceFunction(t)

                    # XXX handle serviceDef, args being None

                    for i in range(len(args)):
                        args[i]=xpathToCode(args[i], False)
                    jargs="[%s]" % (",".join(args))

                    # XXX should be processmodel.DATASERVICE_PROCESS_NAME, not "dataservice"
                    if serviceDef[0]=='dataservice':
                        expr.append("runtimesupport.invokeDataServiceWrapper(%s, %s, ctx, locals())" % \
                                (serviceDef, jargs))
                    else:
                        expr.append("runtimesupport.invokeServiceWrapper(%s, %s, ctx)" % \
                                (serviceDef, jargs))
                else:
                    if (lastToken==')' or lastToken==']'):
                        wasFunc=True
                    else:
                        wasFunc=False
                    if (t.tokenVal.startswith('/')) and not wasFunc:
                        expr.append(XPATH_ROOT_VAR)
                    expr.append(t.tokenVal.replace('/','.'))
                lastToken=t.tokenVal
            else:
                expr.append(t.tokenVal)


        if (len(expr) == 2 and expr[0]==" "):
            expr = "".join(expr)
            result.append(expr)
        elif (len(expr) > 1):
            expr = "".join(expr)
            result.append("(%s)" % expr)
        elif (len(expr) > 0):
            result.append(expr[0])

    return " and ".join(result)

def _parseArgs(t):
    args=[]
    argcon=""

    if t.tokenVal!='(':
        return []
    if t.nextTokenVal==')':
        t.getNextToken()
        return []

    depth=1

    while(depth!=0):
        if not t.nextToken:
            raise Exception("parameters list with no closing ) after token: %s" % t.tokenVal)
        t.getNextToken()

        if t.tokenVal=='(':
            depth+=1
        if t.tokenVal==')':
            depth-=1

        if depth==0 or (depth==1 and t.tokenVal==','):
            args.append(argcon)
            argcon=""
        else:
            argcon+=t.tokenVal
    return args

def _parseServiceFunction(t):
    """Parses what appears to be a service function call into serviceDefs and args lists.

    Returns None, None if the serviceFunction appears to be invalid.
    """
    if t.nextTokenVal!='(':
        return t.tokenVal, None

    serviceDef=t.tokenVal.split(':')
    t.getNextToken()
    args=_parseArgs(t)

    return serviceDef, args

def _parseConstantFunction(t, outputValue):
    firstVal = t.tokenVal
    if t.nextTokenVal != '(':
        return firstVal
    t.getNextToken()
    if t.nextTokenVal != ')':
        return "%s%s" % (firstVal, '(')
    t.getNextToken()
    return outputValue

def parseDSPredicate(ctx, str, vars, valueList=None):
    from activegrid.util.utillang import evalCode
    from activegrid.util.utillang import ObjAsDict

    if valueList == None:
        valueList = []
    indexVar=0
    oldIndexVar=0
    sourceStr=str
    inlinedPredicate=[]
    qualifications=[]
    while True:
        oldIndexVar = indexVar
        dollarCurlForm = False
        quoted = False
        indexVar = sourceStr.find("bpws:getVariableData", indexVar)
        if indexVar == -1:
            indexVar = sourceStr.find("${", oldIndexVar)
            if indexVar == -1:
                break
            dollarCurlForm = True
        if indexVar > 0 and sourceStr[indexVar-1] in ('"',"'"):
            quoted = True
        if not dollarCurlForm:
            openParen = sourceStr.find("(", indexVar)
            if openParen == -1:
                break
            closeParen = sourceStr.find(")", openParen)
            if closeParen == -1:
                break
        else:
            openParen = indexVar+1
            closeParen = sourceStr.find("}", openParen)
            if closeParen == -1:
                break
        varRef = sourceStr[openParen+1: closeParen]
        if varRef.startswith('"') or varRef.startswith("'"):
            varRef = varRef[1:]
        if varRef.endswith('"') or varRef.endswith("'"):
            varRef = varRef[:-1]
        if isinstance(vars, dict) or isinstance(vars, ObjAsDict):
            varRefCode = xpathToCode(varRef)
            value = evalCode(varRefCode, vars)
        else:
            value = ctx.evalPath(vars, varRef)
        inlinedPredicate.append(sourceStr[oldIndexVar:indexVar])
        if quoted:
            inlinedPredicate.append("%s" % value)
        else:
            inlinedPredicate.append('%s')
            valueList.append(value)
        indexVar = closeParen+1
    inlinedPredicate.append(sourceStr[oldIndexVar:])
    qualifications.append(''.join(inlinedPredicate))
    return qualifications, valueList