#!/usr/bin/python3.1

import codecs, encodings, os, re, sys, string, subprocess, json, pickle

defaultEncoding = 'UTF-8'  ## DO NOT CHANGE THIS.  Expat can't handle anything else that's useful.

##############################
## EDIT DEFAULTS BELOW HERE ##
##############################
defaultInputEncoding = defaultEncoding  ## default input encoding.
defaultOutputEncoding = defaultEncoding  ## default output encoding.

defaultCollapseContainers = True  ## If True, this makes the parsed information *shorter* and *simpler*,
                                  ## so it should generally be True.

defaultAddImplicitSemes = True  ## If True, implicit semes will be reported explicitly.
                              ## Please leave this True so people don't
                              ## write a different tool that doesn't work
                              ## the same way to accomplish this same
                              ## (essential!) task.

defaultValidateXml = True ## If True, the XML instances created by this parser will
                           ## be checked for conformance to the DTD.  Please leave this True
                           ## so we can tell when something breaks.

defaultTokenDisp = False   ## If True, "tokenDisp" attributes will appear on most elements.
                           ## The values of these attributes are large and numerous, and
                           ## they are useful only for human inspection of the XML.

defaultFirstLast = True    ## If True, "first" and "last" attributes will be provided
                           ## on most elements, whose values are the numeric positions of
                           ## the first and last characters of the Star construct that corresponds
                           ## to the element.  Many tools will need this information, and there's little
                           ## harm in providing it.  It probably should be True.

defaultIncludeDtd = False  ## If True, the XML output will include the DTD so that the output
                           ## can be parsed by a validating XML parser, and as a way of allowing the
                           ## XML information to be somewhat self-describing.  Few tools will need this.
                           ## Probably should be False.

defaultPrettyXml = False   ## If True, the XML output will be "pretty-printed", i.e., formatted for
                           ## human inspection.  Generally should be False, because True means the
                           ## XML will have lots of whitespace in it that applications will ignore anyway.

defaultShowHelperCommand = False   ## If True, and if the helper program (starprettyvalid.py) is invoked
                                   ## (it will be invoked if either -validateXML or -prettyXml is True)
                                   ## the command that invokes the helper will be output to the
                                   ## -internalErrors stream.  Useful for parser/helper debugging.  Should
                                   ## normally be False.

defaultBinary = True       ## If True, outputs will include 'binary' attributes.

defaultTokenTweakDebug = False     ## can be set True using the -debug 'True' invocation option.

## LEGEND:   '.' suppress output altogether
##           '-' output to stdout
##          '--' output to stderr

defaultStarErrorsStream = '--'         ## Normally output errors to stderr.
defaultXmlErrorsStream = '--'          ## Normally output errors to stderr.
defaultInternalErrorsStream = '--'      ## Normally output errors to stderr.

defaultJsonStream = '.'           ## Normally suppress this output.

defaultPickleStream = '.'           ## Normally suppress this output.

defaultXmlStream = '-'            ## Normally output XML to stdout.
defaultXmlAsParsedStream = '.'    ## Normally don't output the version used to validate.

defaultTokensStream = '.'            ## Normally suppress this output.
defaultAllTokensStream = '.'         ## Normally suppress this output.
##############################
## EDIT DEFAULTS ABOVE HERE ##
##############################


defaultDtdWithPEsStream = '.'     ## This must be left as it is: normally suppress this output.
defaultDtdStream = '.'            ## This must be left as it is: normally suppress this output.
defaultDtdOnlyForHtmlStream = '.' ## This must be left as it is: normally suppress this output.

layerCount = 7 ## the number of layers, where Layer 0 is the lowest
               ## (most primitive) one, and ( layerCount - 1) is the
               ## highest one.


#######################################################
starErrorsStreamFO = sys.stderr  ## temporary
#######################################################

#######################################################
cvsDateRE = re.compile( '(^\\$[D][a]te: )([0-9]+)(-)([0-9]+)(-)([0-9]+)( )([0-9]+)(:)([0-9]+)(:)([0-9]+)( \\$$)')
def cvsDateParse( cvsDate):
    cvsDateMO = cvsDateRE.match( cvsDate)
    return [
        cvsDateMO.group(  2),
        cvsDateMO.group(  4),
        cvsDateMO.group(  6),
        cvsDateMO.group(  8),
        cvsDateMO.group( 10),
        cvsDateMO.group( 12),
    ]
    
cvsRevision = "$Revision: 1.122 $"
cvsDate = "$Date: 2010-07-01 10:52:44 $"
cvsDateVector = cvsDateParse( cvsDate)

LICENSE_NOTICE = """This software is licensed under the Apache License, Version
2.0 (the "License"); you may not use this file except in
conformance with the License.  Copies of the License are
available at http://www.apache.org/licenses/LICENSE-2.0

NOTICE: Unless required by applicable law or agreed to in
writing, software distributed under the License is
distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
CONDITIONS OF ANY KIND, either express or implied.  See the
License for the specific language governing permissions and
limitations under the License.

Copies of the source code and the license are available at
http://starparser.ieml.org

Written by Steven R. Newcomb, Coolheads Consulting, and
           Michel R. Biezunski, Infoloom,
under the direction of Pierre Levy, University of Ottawa.

===========================================================
"""

#######################################################
cvsRevStringRE = re.compile( '(^\\$[R][e]vision: )([0-9]+)(\\.)([0-9]+)( \\$$)')
def starparser_cvsRevToSoftwareRev( cvsRevString):
    cvsRevStringMO = cvsRevStringRE.match( cvsRevString)
    revnumber = int( cvsRevStringMO.group( 4))
    if revnumber >= 117:
        return '7.%s' % ( revnumber - 117)
    if revnumber >= 87:
        return '6.%s' % ( revnumber - 87)
    if revnumber >= 76:
        return '5.%s' % ( revnumber - 76)
    if revnumber >= 67:
        return '4.%s' % ( revnumber - 67)
    if revnumber >= 62:
        return '3.%s' % ( revnumber - 62)
    elif revnumber > 53:
        return '2.%s' % ( revnumber - 53)
    else:
        return '1.%s' % ( revnumber)
    
STARPARSER_VERSION = 'v%s %s' % (
    starparser_cvsRevToSoftwareRev( cvsRevision),
    cvsDate.split()[1],
)

## Star symbol definitions

## maximum value of a unicode character in this program
MAXUNICODEVALUE = ( 2**16) - 1

## Layer 0 symbols
star_I = 'I'
star_F = 'F'
star_E = 'E'
star_M = 'M'
star_O = 'O'
star_U = 'U'
star_A = 'A'
star_S = 'S'
star_B = 'B'
star_T = 'T'

L0SymbolsToPrimitiveSets = {
    'E': frozenset( ( 'E', )),
    'U': frozenset( ( 'U', )),
    'A': frozenset( ( 'A', )),
    'S': frozenset( ( 'S', )),
    'B': frozenset( ( 'B', )),
    'T': frozenset( ( 'T', )),
    'I': frozenset( ( 'E', 'U', 'A', 'S', 'B', 'T', )),
    'F': frozenset( (      'U', 'A', 'S', 'B', 'T', )),
    'O': frozenset( (      'U', 'A',                )),
    'M': frozenset( (                'S', 'B', 'T', )),
}

## this is used to display primitives in their natural order
primitiveOrderDict = {
    'E': 1,
    'U': 2,
    'A': 3,
    'S': 4,
    'B': 5,
    'T': 6,
}
def primitiveOrder( primStr):  ## this function is used as an argument to sorted()
    return primitiveOrderDict[ primStr]

## Layer 1 symbols
star_wo = 'wo'
star_wa = 'wa'
star_wu = 'wu'
star_we = 'we'
star_y = 'y'
star_o = 'o'
star_e = 'e'
star_u = 'u'
star_a = 'a'
star_i = 'i'
star_j = 'j'
star_g = 'g'
star_h = 'h'
star_c = 'c'
star_p = 'p'
star_x = 'x'
star_s = 's'
star_b = 'b'
star_t = 't'
star_k = 'k'
star_m = 'm'
star_n = 'n'
star_d = 'd'
star_f = 'f'
star_l = 'l'

L1SymbolsToPrimitiveSets = {
    'wo': ( frozenset(( 'U',)), frozenset(( 'U',)), frozenset(( 'E',)), ),
    'wa': ( frozenset(( 'U',)), frozenset(( 'A',)), frozenset(( 'E',)), ),
    'wu': ( frozenset(( 'A',)), frozenset(( 'U',)), frozenset(( 'E',)), ),
    'we': ( frozenset(( 'A',)), frozenset(( 'A',)), frozenset(( 'E',)), ),
    'y': ( frozenset(( 'U',)), frozenset(( 'S',)), frozenset(( 'E',)), ),
    'o': ( frozenset(( 'U',)), frozenset(( 'B',)), frozenset(( 'E',)), ),
    'e': ( frozenset(( 'U',)), frozenset(( 'T',)), frozenset(( 'E',)), ),
    'u': ( frozenset(( 'A',)), frozenset(( 'S',)), frozenset(( 'E',)), ),
    'a': ( frozenset(( 'A',)), frozenset(( 'B',)), frozenset(( 'E',)), ),
    'i': ( frozenset(( 'A',)), frozenset(( 'T',)), frozenset(( 'E',)), ),
    'j': ( frozenset(( 'S',)), frozenset(( 'U',)), frozenset(( 'E',)), ),
    'g': ( frozenset(( 'S',)), frozenset(( 'A',)), frozenset(( 'E',)), ),
    'h': ( frozenset(( 'B',)), frozenset(( 'U',)), frozenset(( 'E',)), ),
    'c': ( frozenset(( 'B',)), frozenset(( 'A',)), frozenset(( 'E',)), ),
    'p': ( frozenset(( 'T',)), frozenset(( 'U',)), frozenset(( 'E',)), ),
    'x': ( frozenset(( 'T',)), frozenset(( 'A',)), frozenset(( 'E',)), ),
    's': ( frozenset(( 'S',)), frozenset(( 'S',)), frozenset(( 'E',)), ),
    'b': ( frozenset(( 'S',)), frozenset(( 'B',)), frozenset(( 'E',)), ),
    't': ( frozenset(( 'S',)), frozenset(( 'T',)), frozenset(( 'E',)), ),
    'k': ( frozenset(( 'B',)), frozenset(( 'S',)), frozenset(( 'E',)), ),
    'm': ( frozenset(( 'B',)), frozenset(( 'B',)), frozenset(( 'E',)), ),
    'n': ( frozenset(( 'B',)), frozenset(( 'T',)), frozenset(( 'E',)), ),
    'd': ( frozenset(( 'T',)), frozenset(( 'S',)), frozenset(( 'E',)), ),
    'f': ( frozenset(( 'T',)), frozenset(( 'B',)), frozenset(( 'E',)), ),
    'l': ( frozenset(( 'T',)), frozenset(( 'T',)), frozenset(( 'E',)), ),
}

## Star syntactic features
star_expression = [ '*', '**']
star_group = [ '(', ')']
star_undeterminedSubsetOf = [ '<', '>']
star_comment = [ '$?', '?$']
star_categorysep = '/'
star_instantiator = [ '[', ']']
star_diagonal = [ '{', '}']
star_parameterIdentifier = [ '"', '"']  ## (optional) start and end of a parameter identifier string.

## Four characters, all regarded as whitespace
star_whitespace = ' \011\012\015'

## infix operators used in nongenerative operations 
star_primitive_union = '|'
star_primitive_intersection = '&'
star_primitive_difference = '^'
star_sequence_union = '+'
star_sequence_intersection = '@'
star_sequence_difference = '#'

operatorSymbolToOperationName = {
    star_primitive_union: 'primitiveUnion',
    star_primitive_intersection: 'primitiveIntersection',
    star_primitive_difference: 'primitiveDifference',
    star_sequence_union: 'sequenceUnion',
    star_sequence_intersection: 'sequenceIntersection',
    star_sequence_difference: 'sequenceDifference',
}
operatorOrderDict = {
           star_primitive_union: 1,
    star_primitive_intersection: 2,
      star_primitive_difference: 3,
            star_sequence_union: 4,
     star_sequence_intersection: 5,
       star_sequence_difference: 6,
}    
def operatorOrder( operatorStr):  ## this function is used as an argument to sorted()
    return operatorOrderDict[ operatorStr]



## layermarks
star_fillWithCompletenessLayerMark = '~'
star_fillWithPrecedingSemeLayerMark = '!'
star_L0LayerMark = ':'
star_L1LayerMark = '.'
star_L2LayerMark = '-'
star_L3LayerMark = "'"
star_L4LayerMark = ','
star_L5LayerMark = '_'
star_L6LayerMark = ';'

layerMarkToLayerNumber = {
    star_L0LayerMark: 0,
    star_L1LayerMark: 1,
    star_L2LayerMark: 2,
    star_L3LayerMark: 3,
    star_L4LayerMark: 4,
    star_L5LayerMark: 5,
    star_L6LayerMark: 6,
    star_fillWithCompletenessLayerMark: None,
    star_fillWithPrecedingSemeLayerMark: None,
}


## Token type names.  Do not change their order, unless
## you really want to invalidate the vital regular expressions
## that depend on their order.
tokenTypeNames = [

    'primitiveUnionAtLx',
    'primitiveDifferenceAtLx',
    'primitiveIntersectionAtLx',
    'sequenceUnionAtLx',
    'sequenceDifferenceAtLx',
    'sequenceIntersectionAtLx',

    'groupAtLx',
    'undeterminedSubsetOfAtLx',
    'diagonalAtLx',
    'categoryExpressionAtLx',

    'genOpAtLx',  ## this never actually happens, but having it makes the code easier to write

]

for metaTokenType in [
    'genOp',
    'primitiveUnion',
    'primitiveDifference',
    'primitiveIntersection',
    'sequenceUnion',
    'sequenceDifference',
    'sequenceIntersection',
    'group',
    'undeterminedSubsetOf',
    'diagonal',
    'categoryExpression',
]:

    for layerNumber in range( layerCount):
        tokenTypeNames.append(
            '%sAtL%d' % (
                metaTokenType,
                layerNumber,
            ),
        )

tokenTypeNames.extend( [
    'whitespace',
    'comment',
    'instantiator',
    'instantiatorContent',

    'categorySeparator',

    'usl',

    'iemlExpression',  ## This root token type is temporary; it is only reported if parsing is unsuccessful.
                       ## If parsing succeeds, the root token is replaced by whatever the root Star construct
                       ## turns out to be.
])

regExpableCharToTokenTypeName = {}
tokenTypeNameToRegExpableChar = {}
numberOfNonChrTokenTypes = 0
for tokenTypeName in tokenTypeNames:
    numberOfNonChrTokenTypes += 1
    while (
        ( numberOfNonChrTokenTypes < ord( '0')) or
        ( numberOfNonChrTokenTypes > ord( '9') and numberOfNonChrTokenTypes < ord( 'A')) or
        ( numberOfNonChrTokenTypes > ord( 'Z') and numberOfNonChrTokenTypes < ord( 'a')) or
        ( numberOfNonChrTokenTypes > ord( 'z') and numberOfNonChrTokenTypes < 128)
    ):
        numberOfNonChrTokenTypes += 1
    if tokenTypeName in tokenTypeNameToRegExpableChar:
        writeOutput(
            starErrorsStreamFO,
            'internal error: duplicate token type name "%s"\n' % ( tokenTypeName),
            '-StarErrors',
        )
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)
    tokenTypeNameToRegExpableChar[ tokenTypeName] = chr( numberOfNonChrTokenTypes)
    regExpableCharToTokenTypeName[ chr( numberOfNonChrTokenTypes)] = tokenTypeName
    ordinalOfLeastChar = numberOfNonChrTokenTypes + 1

globalsDict = globals()
localsDict = locals()

allTokenTypesAtLayerRE = []


for layerNumber in range( layerCount):
    allTokenTypesAtLayerRE.append( [])
    exec(
        "allTokenTypesAtLayerRE[ nxmber] = re.compile( '^(([%s]*)([%s])([%s]*))+$' % (\n\
            ''.join( [ \n\
                tokenTypeNameToRegExpableChar[ 'whitespace'],  ## irrelevant to layer determination\n\
                tokenTypeNameToRegExpableChar[ 'comment'],  ## irrelevant to layer determination\n\
                tokenTypeNameToRegExpableChar[ 'instantiator'],  ## irrelevant to layer determination\n\
            ]),\n\
            \n\
            ''.join( [ \n\
                tokenTypeNameToRegExpableChar[ 'primitiveUnionAtLnxmber'],\n\
                tokenTypeNameToRegExpableChar[ 'primitiveDifferenceAtLnxmber'],\n\
                tokenTypeNameToRegExpableChar[ 'primitiveIntersectionAtLnxmber'],\n\
                tokenTypeNameToRegExpableChar[ 'sequenceUnionAtLnxmber'],\n\
                tokenTypeNameToRegExpableChar[ 'sequenceDifferenceAtLnxmber'],\n\
                tokenTypeNameToRegExpableChar[ 'sequenceIntersectionAtLnxmber'],\n\
                tokenTypeNameToRegExpableChar[ 'groupAtLnxmber'],\n\
                tokenTypeNameToRegExpableChar[ 'undeterminedSubsetOfAtLnxmber'],\n\
                tokenTypeNameToRegExpableChar[ 'diagonalAtLnxmber'],\n\
                tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLnxmber'],\n\
                tokenTypeNameToRegExpableChar[ 'genOpAtLnxmber'],\n\
            ]),\n\
            \n\
            ''.join( [ \n\
                tokenTypeNameToRegExpableChar[ 'whitespace'],  ## irrelevant to layer determination\n\
                tokenTypeNameToRegExpableChar[ 'comment'],  ## irrelevant to layer determination\n\
                tokenTypeNameToRegExpableChar[ 'instantiator'],  ## irrelevant to layer determination\n\
            ]),\n\
        ))".replace( 'nxmber', '%s' % ( layerNumber)),
        globalsDict,
        localsDict,
    )

#######################################################
## Definitions for Binary IEML

Ebit = 0  ## power of 2 of the corresponding bit
Ubit = 1
Abit = 2
Sbit = 3
Bbit = 4
Tbit = 5

bitPositionToPrimChar = {
    Ebit: 'E',
    Ubit: 'U',
    Abit: 'A',
    Sbit: 'S',
    Bbit: 'B',
    Tbit: 'T',
}
primCharToBitPosition = {
    'E': Ebit,
    'U': Ubit,
    'A': Abit,
    'S': Sbit,
    'B': Bbit,
    'T': Tbit,
}
primCharToBinValue = {
    'E': 1 << Ebit,  ## 000001  doesn't move because Ebit is 0
    'U': 1 << Ubit,  ## 000010 
    'A': 1 << Abit,  ## 000100 
    'S': 1 << Sbit,
    'B': 1 << Bbit,
    'T': 1 << Tbit,
}

## lengths of IEML strings
layerLengthList = []
lenL = {}
for powerOf3 in range( layerCount):
    binscLength = 3 ** powerOf3
    lenL[ binscLength] = powerOf3


#######################################################
badSemCharRE = re.compile( '[%s%s-%s]' % (
    chr( 0),               ## not meaningful
    chr( 2**6),            ## highest meaningful char is ((2**6) -1), i.e., 63
    chr( MAXUNICODEVALUE),  ## highest ordinal value of a char on this system
))
class Binsc:
    """
    Mostly it's .binsc : a"bytes" data object.  These are
    used internally, in preference to"bytearray"s, because they can be
    hashed.  A "binsc" is a string of 6-bit "semantic characters";
    each character represents a primitive category.  As a whole, a
    binsc always represents a "straight category" in binary IEML.

    The length of a Binsc depends on its layer: 729 characters if
    layer=6, 1 character if layer=0.  A binsc is a member of the
    set of binscs that appears as the .binscs attribute of
    IemlSolo objects.
    """

    def __init__( self, initVal):
        """
        initVal must be one of:
            Binsc object
            bytes object
            bytearray object
            str object
            int object  (in which case the result is always a 1-byte binsc)
        """

        if isinstance( initVal, Binsc):
            tmpStr = str( initVal.binsc)
        elif isinstance( initVal, bytes):
            tmpStr = str( initVal)
        elif isinstance( initVal, bytearray ):
            tmpStr = str( initVal)
        elif isinstance( initVal, str):
            tmpStr = initVal
        elif isinstance( initVal, int):  ## if it's an int, we assume it's a primitive, so we make a bytes whose len is 1.
            tmpStr = chr( initVal)
        else:
            errMsg( 'initializing value is not one of: Binsc, bytes, bytearray, str, or int.  It\'s a %s : "%s"' % ( type( initVal), initVal))
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)
        try:
            self.layerNumber = lenL[ len( tmpStr)]  ## set the layer number
        except KeyError:
            errMsg( 'length of binsc (i.e., %d) is not one of %s.' % (
                    len( tmpStr),
                    sorted( list( lenL.keys())),
                ),
            )
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)

        badSemCharMO = badSemCharRE.search( tmpStr)  ## fast way to detect bad characters
        if badSemCharMO is not None:
            errMsg( 'ordinal value of byte %d (value=%d) is outside the range 1-63' % (
                badSemCharMO.start(),
                ord( tmpStr[ badSemCharMO.start()]),
            ))
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)
        self.binsc = bytes( tmpStr, 'us-ascii')

#######################################################
def iemlTrioToIemlSemeBinscs( iemlTrio):
    """
    Given an iemlTrio object or an equivalent list of three sets of
    binscs, return a single set (which can, of course, be
    frozenset-ified, but that is not done here) of binscs, each of
    which is 3 times as long as the binscs in the input sets.

    """
    if iemlTrio is None:
        return None
    elif not isinstance( iemlTrio, IemlTrio):
        errMsg( 'iemlTrio is not an IemlTrio object; it\'s a "%s"' % (
            type( iemlTrio),
        ))
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)
    bigBinscs = set()
    for s0 in list( iemlTrio.semes[ 0]):    ## multiply the semes together
        for s1 in list( iemlTrio.semes[ 1]):
            for s2 in list( iemlTrio.semes[ 2]):
                bigbinsc = bytearray( s0)
                bigbinsc.extend( bytearray( s1))
                bigbinsc.extend( bytearray( s2))
                bigBinscs.add( bytes( bigbinsc))
    return bigBinscs

#######################################################
class IemlSolo:  
    """
    Mostly it's .binscs : a frozenset of Binscs.
    Each instance of an IemlSolo represents a
    complex.
    """
    #######################################################
    def __init__( self, initVal):
        """
        An IemlSolo can be constructed from an IemlTrio,
        in which case the resulting binscs are 3 times
        as long as the binscs in the IemlTrio.  It can
        also be constructed from a frozenset, a binsc, a bytes,
        a bytearray, a str, an int, or a tuple or list of binscs.

        NOTE: The value of a binary attribute of a token object, and
              in the JSON and XML outputs is always equivalent to an
              IemlSolo, never to an IemlTrio.  If needed, an IemlTrio
              object for a given genOp can be reconstructed from the
              binary attributes of the semes (semes) of that
              genOp.
        """

        if isinstance( initVal, IemlTrio):  
            self.binscs = frozenset( iemlTrioToIemlSemeBinscs( initVal))  ## this case is more complicated; must multiply the semes
        elif isinstance( initVal, frozenset):  
            self.binscs = initVal
        elif isinstance( initVal, Binsc):
            self.binscs = frozenset( [ initVal.binsc])
        elif isinstance( initVal, bytes):
            self.binscs = frozenset( [ Binsc( initVal).binsc])
        elif isinstance( initVal, bytearray):
            self.binscs = frozenset( [ Binsc( initVal).binsc])
        elif isinstance( initVal, str):
            self.binscs = frozenset( [ Binsc( initVal).binsc])
        elif isinstance( initVal, int):
            self.binscs = frozenset( [ Binsc( initVal).binsc])
        elif isinstance( initVal, list) or isinstance( initVal, tuple):
            self.binscs = frozenset( initVal)
        else:
            errMsg( 'initializing value is of type %s : "%s"' % ( type( initVal), initVal))
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)

        ## check all the binscs and set self.layerNumber
        self.layerNumber = None
        for binsc in self.binscs:
            if not isinstance( binsc, bytes):
                errMsg( 'binsc is not a "bytes" object; it\'s a %s' % (
                    type( binsc),
                ))
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)

            try:
                tryLayer = lenL[ len( binsc)]  ## see if there is a layerNumber that corresponds to the length
            except KeyError:
                errMsg( 'length of binsc (i.e., %d) is not one of %s.' % (
                        len( binsc),
                        sorted( list( lenL.keys())),
                    ),
                )
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)

            if self.layerNumber is None:
                self.layerNumber = tryLayer

            elif self.layerNumber != tryLayer:
                errMsg( 'binscs are not at the same layer: %d vs. %d' % (
                    self.layerNumber,
                    tryLayer,
                ))
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)

    #######################################################
    def get( self):
        return self.binscs

#######################################################
class IemlTrio:
    """
    An IemlTrio is equivalent to a list (or tuple) of three IemlSemes
    (i.e., it contains 3 sets of binscs, where each IemlSolo object
    contains a single set of binscs), each of which represents a
    seme (seme) in the genOp represented by the IemlTrio.

    """

    #######################################################
    def __init__( self, initVal):  ## initVal MUST have 3 items in it, each of which amounts to a frozenset of Binscs.
        if ( isinstance( initVal, list) or isinstance( initVal, tuple)) and len( initVal) == 3:
            newSemeList = []
            for seme in initVal:
                if isinstance( seme, IemlSolo):
                    newSemeList.append( seme.binscs)
                elif isinstance( seme, frozenset) or isinstance( seme, set) or isinstance( seme, list) or isinstance( seme, tuple):
                    newSemeList.append( IemlSolo( seme).binscs)
                elif seme is None:
                    self = None
                    return
                else:
                    errMsg( 'seme %s is not an IemlSolo, IemlTrio, or frozenset; it\'s a %s.' % (
                        seme, type( seme),
                    ))
                    if tokenTweakDebug:
                        import pdb
                        pdb.set_trace()
                    sys.exit( 1)
            self.semes = tuple( initVal)
        else:
            errMsg( 'initVal is not a list (or tuple) with 3 items in it.  It\'s a %s: "%s"' % (
                type( initVal),
                repr( initVal),
            ))
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)
    #######################################################
    def get( self):
        return self.semes

    

#######################################################
dtdSourceStringList = []
dtdSourceStringList.append( '<!-- \n\
This Document Type Definition (DTD) defines the syntax\n\
of Star-XML instances output by starparser.py.  It is\n\
also useful for understanding its JSON outputs. \n\
(Output from %s version %s.)\n\
\n\
Legend for this DTD:\n\
\n\
{.name} Names enclosed in braces are the names of the corresponding\n\
        properties implemented in Starparser as the attributes\n\
        of instances of the "Token" object class.  E.g. {.first}.\n\
-->\n\
\n\
<!-- Common attributes defined in the parameter entity a.common;  :\n\
    first CDATA #IMPLIED   \n\
        Decimal integer.  The position within the whole Star expression of the first\n\
        character of the subexpression represented by this element.  {.first}\n\
\n\
    last CDATA #IMPLIED   \n\
        Decimal integer.  The position within the whole Star expression of the last\n\
        character of the subexpression represented by this element.  {.last}\n\
\n\
    id    ID    #IMPLIED   \n\
        The unique id of the token, preceded by the character \'t\'.  {.id}\n\
\n\
    implicit (True|False) "False"\n\
        If implicit==True, the element was implicit in the original expression\n\
        and was made explicit by Starparser.  {.implicit}\n\
\n\
    tokenDisp CDATA #IMPLIED\n\
        A string that offers a display of the entire Star expression, with carets\n\
        under only those characters starting in position "first" and ending with\n\
        "last", inclusive.    \n\
\n\
    Note: Nothing corresponding to {.parent} is needed in XML because\n\
          the parent-child relationship is represented by the\n\
          containment of children elements within their parent\n\
          elements.\n\
\n\
    Note: Nothing corresponding to {.tokenType} is needed here because\n\
          this information is reflected in the names of element types\n\
          (i.e. in the generic identifiers of the XML elements).\n\
-->\n\
<!ENTITY %% a.common "\n\
    first CDATA #IMPLIED\n\
    last  CDATA #IMPLIED\n\
    id    ID    #IMPLIED\n\
    implicit (True|False) \'False\'\n\
    tokenDisp CDATA #IMPLIED\n\
">\n\
\n\
<!-- "CIW" means "any combination of Comments, Instantiators, and Whitespaces.". -->\n\
<!ENTITY %% m.ciw "( whitespace | comment | instantiator)*">\n\
\n\
<!ELEMENT whitespace ( #PCDATA)>\n\
<!ATTLIST whitespace\n\
    %%a.common;\n\
>\n\
<!ELEMENT comment ( #PCDATA)>\n\
<!ATTLIST comment\n\
    %%a.common;\n\
>\n\
<!ELEMENT instantiator ( #PCDATA | comment)*>\n\
<!ATTLIST instantiator\n\
    %%a.common;\n\
>\n\
\n\
<!ENTITY %% m.allOpsAndContainersAtAllLayers "M.ALLOPSANDCONTAINERSATALLLAYERS">\n\
' % (
    os.path.split( sys.argv[ 0])[ 1],
    STARPARSER_VERSION,
))

for layerNumber in range( layerCount):
    dtdSourceStringList.append(
        '<!ENTITY %% m.allOpsAndContainersAtL%d "M.ALLOPSANDCONTAINERSATL%d">\n' % (
            layerNumber,
            layerNumber,
        )
    )

dtdSourceStringList.append( '\
\n\
<!-- \n\
root element type: <ieml>\n\
\n\
An IEML expression is either a usl (in which case the <ieml> contains a <usl>),\n\
or it is an expression of a single category or complex, in which case it appears\n\
as a container (group, undeterminedSubsetOf, or categoryExpression), or as a \n\
genOp (genOpAtL...), or as nonGenOp (primitive...AtL...  sequence...AtL...).\n\
\n\
The attributes of <ieml> elements are:\n\
\n\
    prologue CDATA #IMPLIED\n\
    epilogue CDATA #IMPLIED\n\
       These are strings that precede and follow (respectively) the\n\
       Star expression.  The * and ** delimiters, if any, appear in\n\
       these attributes, along with adjacent whitespaces, if any.\n\
\n\
    expression CDATA #IMPLIED\n\
       This is the original Star expression that was parsed in order\n\
       to create this XML document.\n\
-->\n\
<!ELEMENT ieml ( \n\
usl |\n\
%%m.allOpsAndContainersAtAllLayers; )>\n\
<!ATTLIST ieml\n\
    %%a.common;\n\
    prologue CDATA #IMPLIED\n\
    epilogue CDATA #IMPLIED\n\
    expression CDATA #IMPLIED\n\
    parser CDATA #IMPLIED\n\
>\n\
\n\
<!ELEMENT usl (\n\
    %%m.ciw;,\n\
' % ())

for layerNumber in range( layerCount):
    dtdSourceStringList.append( 
        'complexAtL%d?' % ( layerNumber)
    )
    if layerNumber != layerCount - 1:
        dtdSourceStringList.append( ',')
    dtdSourceStringList.append( '\n')

dtdSourceStringList.append( '\
)>\n\
<!ATTLIST usl\n\
    %%a.common;\n\
>\n\
<!-- binary attribute\n\
\n\
The binary attribute\'s value is either "None" (if the binary value\n\
could not be calculated) or a list of newline-separated "binary\n\
straight categories", each represented as as a hexadecimal string.  In\n\
each hexadecimal string, two hex characters represent each of the\n\
straight category\'s component 6-bit primitive categories.  The 6 bits\n\
are the least significant (rightmost) bits of the 8-bit byte\n\
represented by the two hex characters, as shown in the following\n\
table:\n\
\n\
hexadecimal        bits          ieml         decimal   STAR\n\
                              primitives                symbol\n\
\n\
    00           00 0000       (invalid)          0  \n\
    01           00 0001       __ ___E            1       E:\n\
    02           00 0010       __ __U_            2       U:\n\
    03           00 0011       __ __UE            3  \n\
    04           00 0100       __ _A__            4       A:\n\
    05           00 0101       __ _A_E            5  \n\
    06           00 0110       __ _AU_            6       O:\n\
    07           00 0111       __ _AUE            7  \n\
    08           00 1000       __ S___            8       S:\n\
    09           00 1001       __ S__E            9  \n\
    0a           00 1010       __ S_U_           10  \n\
    0b           00 1011       __ S_UE           11  \n\
    0c           00 1100       __ SA__           12  \n\
    0d           00 1101       __ SA_E           13  \n\
    0e           00 1110       __ SAU_           14  \n\
    0f           00 1111       __ SAUE           15  \n\
    10           01 0000       _B ____           16       B:\n\
    11           01 0001       _B ___E           17  \n\
    12           01 0010       _B __U_           18  \n\
    13           01 0011       _B __UE           19  \n\
    14           01 0100       _B _A__           20  \n\
    15           01 0101       _B _A_E           21  \n\
    16           01 0110       _B _AU_           22  \n\
    17           01 0111       _B _AUE           23  \n\
    18           01 1000       _B S___           24  \n\
    19           01 1001       _B S__E           25  \n\
    1a           01 1010       _B S_U_           26  \n\
    1b           01 1011       _B S_UE           27  \n\
    1c           01 1100       _B SA__           28  \n\
    1d           01 1101       _B SA_E           29  \n\
    1e           01 1110       _B SAU_           30  \n\
    1f           01 1111       _B SAUE           31  \n\
    20           10 0000       T_ ____           32       T:\n\
    21           10 0001       T_ ___E           33  \n\
    22           10 0010       T_ __U_           34  \n\
    23           10 0011       T_ __UE           35  \n\
    24           10 0100       T_ _A__           36  \n\
    25           10 0101       T_ _A_E           37  \n\
    26           10 0110       T_ _AU_           38  \n\
    27           10 0111       T_ _AUE           39  \n\
    28           10 1000       T_ S___           40  \n\
    29           10 1001       T_ S__E           41  \n\
    2a           10 1010       T_ S_U_           42  \n\
    2b           10 1011       T_ S_UE           43  \n\
    2c           10 1100       T_ SA__           44  \n\
    2d           10 1101       T_ SA_E           45  \n\
    2e           10 1110       T_ SAU_           46  \n\
    2f           10 1111       T_ SAUE           47  \n\
    30           11 0000       TB ____           48  \n\
    31           11 0001       TB ___E           49  \n\
    32           11 0010       TB __U_           50  \n\
    33           11 0011       TB __UE           51  \n\
    34           11 0100       TB _A__           52  \n\
    35           11 0101       TB _A_E           53  \n\
    36           11 0110       TB _AU_           54  \n\
    37           11 0111       TB _AUE           55  \n\
    38           11 1000       TB S___           56       M:\n\
    39           11 1001       TB S__E           57  \n\
    3a           11 1010       TB S_U_           58  \n\
    3b           11 1011       TB S_UE           59  \n\
    3c           11 1100       TB SA__           60  \n\
    3d           11 1101       TB SA_E           61  \n\
    3e           11 1110       TB SAU_           62       F:\n\
    3f           11 1111       TB SAUE           63       I:\n\
-->\n\
<!ENTITY %% a.binary "binary CDATA #IMPLIED">\n\
\n\
' % ())

for layerNumber in range( layerCount):
    dtdSourceStringList.append( '\
<!ELEMENT complexAtL%d ( %%m.allOpsAndContainersAtL%d;)+>\n\
<!ATTLIST complexAtL%d\n\
    %%a.binary;\n\
>\n\
' % (
     layerNumber,
     layerNumber,
     layerNumber,
))
dtdSourceStringList.append( '\
\n\
<!-- pre-genOP CIWs appear in these: -->\n\
<!ELEMENT preGenOpCIW %%m.ciw;>\n\
\n\
<!-- intra-genOp pre-layermark CIWs appear in any combination of the following: -->\n\
<!ELEMENT postSeme1CIW %%m.ciw;>\n\
<!ELEMENT postSeme2CIW %%m.ciw;>\n\
<!ELEMENT postSeme3CIW %%m.ciw;>\n\
\n\
<!-- post-layermark CIWs appear in these: -->\n\
<!ELEMENT postGenOpCIW %%m.ciw; >\n\
\n\
<!-- attributes common to all genOpAtL... elements:\n\
    layerMark CDATA #IMPLIED\n\
        The character used as the terminating layermark.\n\
        Note: <In genOpAtL...> elements, the position of the layer\n\
              mark within the genOp is always the same as the value of\n\
              the "last" attribute.\n\
\n\
    layerNumber CDATA #IMPLIED\n\
        An integer, 0-%d.  The layer number.  The element\'s generic\n\
        identifier also supplies this information.\n\
\n\
' % ( layerCount - 1)
)
dtdSourceStringList.append( '\
    roleNumber CDATA #IMPLIED\n\
        An integer, 1-3.  The number of the role played by this\n\
        seme in the containing genOp, if any.\n\
\n\
    containedIn CDATA #IMPLIED\n\
        A list of sextuples of whitespace-separated tokens, each\n\
        representing a container that contains this genOp.  Each\n\
        sextuple\'s members (1-6) are:\n\
        1  The container type: "group", "categoryExpression",\n\
           "undeterminedSubsetOf", or "diagonal".\n\
        2. Either a parameter identifier (a string of\n\
           decimal digits) or "None".\n\
        3. (Like "first".) The numeric position within the entire Star\n\
           expression of the start delimiter of the container, or\n\
           "None" if this information is not available.\n\
        4. (Like "last".) The numeric position within the entire Star\n\
           expression of the end delimiter of the container, or\n\
           "None" if this information is not available.\n\
        5. (Like "piFirst".) The numeric position within the entire Star\n\
           expression of the first character of the parameter\n\
           identifier, if any, or "None" if this information is not\n\
           available.\n\
        6. (Like "piLast".) The numeric position within the entire Star\n\
           expression of the last character of the parameter\n\
           identifier, if any, or "None" if this information is not\n\
           available.\n\
        The order of the sextuples is innermost container (whose\n\
        delimiters are closest to the genOp expression) to outermost\n\
        container (delimiters farthest).\n\
\n\
    symbol CDATA #IMPLIED\n\
       Appears only in genOps at L0 and L1, and only when such\n\
       genOps correspond to actual symbols in the IEML expression.  \n\
       At L0, one of: U A S B T I F E M O\n\
       At L1, one of: wo wa wu we y o e u a i j g\n\
                      h c p x s b t k m n d f l \n\
\n\
    symFirst CDATA #IMPLIED\n\
    symLast  CDATA #IMPLIED\n\
       These attributes appear only if there is also a "symbol"\n\
       attribute; their values are the numeric positions within the\n\
       entire Star expression of the first and last characters of the\n\
       symbol.\n\
       Note: A$?comment?$:   (carets reflect first, last)\n\
             ^^^^^^^^^^^^^\n\
             A$?comment?$:   (carets reflect symFirst, symLast)\n\
             ^\n\
\n\
-->\n\
<!ENTITY %% a.genOp "\n\
    layerMark CDATA #IMPLIED\n\
    layerNumber CDATA #IMPLIED\n\
    roleNumber CDATA #IMPLIED\n\
    containedIn CDATA #IMPLIED\n\
    symbol CDATA #IMPLIED\n\
    symFirst CDATA #IMPLIED\n\
    symLast  CDATA #IMPLIED\n\
    %%a.binary;\n\
">\n\
' % ())

dtdSourceStringList.append( '\
\n\
<!ELEMENT genOpAtL0 ( \n\
    preGenOpCIW?,\n\
    postSeme1CIW?,\n\
    postGenOpCIW?\n\
)>\n\
<!-- primitiveSet: a list of primitive names; one or more of E, U, A, S, B, T. -->\n\
<!ATTLIST genOpAtL0 \n\
    %%a.common;\n\
    %%a.genOp;\n\
    primitiveSet CDATA #REQUIRED\n\
>\n\
\n\
<!ENTITY %% m.genOpCIW1to5 "\n\
    preGenOpCIW?,\n\
    postSeme1CIW?,\n\
    postSeme2CIW?,\n\
    postSeme3CIW?,\n\
    postGenOpCIW?,\n\
">\n\
\n\
' % ())

for layerNumber in range( 1, layerCount, +1):
    dtdSourceStringList.append( '\
<!ELEMENT genOpAtL%d ( \n\
    %%m.genOpCIW1to5;\n\
    ( \n\
        %%m.allOpsAndContainersAtL%d;\n\
    ),\n\
    ( \n\
        ( \n\
            %%m.allOpsAndContainersAtL%d;\n\
        ),\n\
        ( \n\
            %%m.allOpsAndContainersAtL%d;\n\
        )?\n\
    )?\n\
)>\n\
<!ATTLIST genOpAtL%d \n\
    %%a.common;\n\
    %%a.genOp;\n\
>\n\
' % (
    layerNumber,
    layerNumber - 1,                                    
    layerNumber - 1,                                    
    layerNumber - 1,                                    
    layerNumber,
))                                    

dtdSourceStringList.append( '\
\n\
\n\
<!-- nonGenOp CIW elements -->\n\
<!ELEMENT preNonGenOpCIW %%m.ciw;>\n\
<!ELEMENT preOperatorCIW %%m.ciw;>\n\
<!ELEMENT postOperatorCIW %%m.ciw;>\n\
<!ELEMENT postNonGenOpCIW %%m.ciw; >\n\
\n\
<!ENTITY %% m.nonGenOpCIW "\n\
    preNonGenOpCIW?,\n\
    preOperatorCIW?,\n\
    postOperatorCIW?,\n\
    postNonGenOpCIW?,\n\
">\n\
\n\
<!-- a.nonGenOp : Attributes common to all nonGenOp elements: \n\
\n\
    containedIn CDATA #IMPLIED\n\
        A list of sextuples of whitespace-separated tokens, each\n\
        representing a container that contains this nonGenOp.  Each\n\
        sextuple\'s members (1-6) are:\n\
        1  The container type: "group", "categoryExpression",\n\
           "undeterminedSubsetOf", or "diagonal".\n\
        2. Either a parameter identifier (a string of\n\
           decimal digits) or "None".\n\
        3. (Like "first".) The numeric position within the entire Star\n\
           expression of the start delimiter of the container, or\n\
           "None" if this information is not available.\n\
        4. (Like "last".) The numeric position within the entire Star\n\
           expression of the end delimiter of the container, or\n\
           "None" if this information is not available.\n\
        The order of the sextuples is innermost container (whose\n\
        delimiters are closest to the nonGenOp expression) to\n\
        outermost container (delimiters farthest).\n\
\n\
    operator CDATA #IMPLIED\n\
       The operator symbol used in the expression.  One of:\n\
       %s  \n\
\n\
' % (
    '  '.join( sorted( list( operatorOrderDict.keys()), key=operatorOrder)),
))

dtdSourceStringList.append( '\
    opFirst CDATA #IMPLIED\n\
    opLast  CDATA #IMPLIED\n\
       The numeric position within the entire Star expression of the\n\
       first and last characters of the operator symbol.\n\
\n\
    layerNumber CDATA #IMPLIED\n\
        An integer, 0-%d.  The layer number.  (This is always the same\n\
        as the layerNumber of both operands.)\n\
' % (
    layerCount - 1,
))
dtdSourceStringList.append( '\
\n\
    roleNumber CDATA #IMPLIED\n\
        An integer, 1-3.  The number of the role played by this\n\
        seme in the containing genOp, if any.\n\
-->\n\
\n\
<!ENTITY %% a.nonGenOp "\n\
    containedIn CDATA #IMPLIED\n\
    operator CDATA #IMPLIED\n\
    opFirst CDATA #IMPLIED\n\
    opLast  CDATA #IMPLIED\n\
    layerNumber CDATA #IMPLIED\n\
    roleNumber CDATA #IMPLIED\n\
    %%a.binary;\n\
">\n\
\n\
START_OF_NONGENOP_TEMPLATE\n\
<!ELEMENT GI (\n\
    %%m.nonGenOpCIW;\n\
    ( %%m.allOpsAndContainersAtLLAYER; ),\n\
    ( %%m.allOpsAndContainersAtLLAYER; )\n\
)>\n\
<!ATTLIST GI\n\
    %%a.common;\n\
    %%a.nonGenOp;\n\
>\n\
END_OF_NONGENOP_TEMPLATE\n\
\n\
<!ENTITY %% m.containerCIWs "\n\
    postParameterIdentifierCIW?\n\
">\n\
<!-- a.containers : Attributes of all container elements:\n\
                    group\n\
                    categoryExpression\n\
                    undeterminedSubsetOf\n\
                    diagonal\n\
    layerNumber CDATA #IMPLIED\n\
        An integer, 0-%d.  The layer number.  (Redundant.  Same as\n\
        the layerNumber of all constructs in the content.)\n\
    parameterIdentifier  CDATA #IMPLIED\n\
        The parameter identifier given for this container.\n\
    piFirst CDATA #IMPLIED\n\
        Decimal integer.  The position within the whole Star expression of the first\n\
        character of the parameter identifier of the container construct {.piFirst}\n\
    piLast  CDATA #IMPLIED\n\
        Decimal integer.  The position within the whole Star expression of the last\n\
        character of the parameter identifier of the container construct {.piLast}\n\
' % (
    layerCount - 1,
))
dtdSourceStringList.append( '\
    parameterIdentifier  CDATA #IMPLIED\n\
        The parameter identifier string (all decimal digits, or whatever was between the "" marks), if any.\n\
    piFirst CDATA #IMPLIED\n\
    piLast  CDATA #IMPLIED\n\
       The numeric position within the entire Star expression of the\n\
       first and last characters of the parameter identifier.\n\
-->\n\
\n\
<!ENTITY %% a.containers "\n\
    %%a.common;\n\
    %%a.binary;\n\
    layerNumber CDATA #IMPLIED\n\
    parameterIdentifier  CDATA #IMPLIED\n\
    piFirst CDATA #IMPLIED\n\
    piLast  CDATA #IMPLIED\n\
">\n\
\n\
' % ())

for elementTypeName in [
    'group',
    'categoryExpression',
    'undeterminedSubsetOf',
    'diagonal',
]:
    for layerNumber in range( layerCount):
        dtdSourceStringList.append( '\
<!ELEMENT %sAtL%d (\n\
%%m.containerCIWs;,\n\
( %%m.allOpsAndContainersAtL%d;)+\n\
' % (
    elementTypeName,
    layerNumber,
    layerNumber,
))
        dtdSourceStringList.append( '\
)>\n\
<!ATTLIST %sAtL%d\n\
    %%a.containers;\n\
>\n\
' % (
    elementTypeName,
    layerNumber,
))

dtdSourceString = ''.join( dtdSourceStringList)  ## DTD is now complete

## temporary; for use while sequence operations are not supported
sequenceOperationMessages = []

#######################################################
def openStream( fileName, rOrW, encodingName=defaultEncoding, **kwargs):
    """
    Return a File Object ("FO") open for writing or reading
    """

    if fileName == '.':
        return None
    elif fileName == '-':
        if rOrW.startswith( 'w'):
            return sys.stdout
        elif rOrW.startswith( 'r'):
            return sys.stdin
        else:
            errMsg( 'internal error')
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)
    elif fileName == '--':
        return sys.stderr

    absFileName = os.path.abspath( fileName)
    if ( 'json' in kwargs and kwargs[ 'json'] is True) or ( 'pickle' in kwargs and kwargs[ 'pickle'] is True):
        thisFO = open( absFileName, '%sb' % ( rOrW))
    else:
        thisFO = codecs.open( absFileName, rOrW, encodingName)
    return thisFO

#######################################################
def writeOutput( FO, buf, streamOptionName,):

    global starErrorsStreamFO

    if FO == sys.stdout or FO == sys.stderr:
        try:
            FO.write( buf)
        except UnicodeEncodeError as e:
            if FO == sys.stdout:
                ioName = 'the standard output stream'
            else:
                ioName = 'the error output stream'
            starErrorsStreamFO.write( '\n\
(Output to the %s stream has been entityRef-ified because it contains\n\
non-ASCII characters and had to flow through %s.\n\
To avoid replacing non-ASCII characters with equivalent XML character\n\
entity references, use a filename as the argument to the %s option\n\
instead of - or --.)\n\n' % (

                streamOptionName,
                ioName,
                streamOptionName,
            ))
            FO.write( buf.encode( 'us-ascii', 'xmlcharrefreplace').decode( 'us-ascii'))
    elif FO == None:
        return  ## do nothing in this case
    else:
        FO.write( buf)
    FO.flush()

#######################################################
def readInput( FO):
    global starErrorsStreamFO

    try:
        inputStr = FO.read()
    except UnicodeDecodeError as  e:
        writeOutput(
            starErrorsStreamFO, 
            'Could not decode the data from \'%s\' using the %s decoder.\nUse the -inputEncoding argument to specify a different decoder?\nError message: "%s"\n' % (
                inputFilePath,
                inputEncoding,
                e,
            ),
            '-StarErrors',
        )
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)
    return inputStr

#######################################################
def expandDTDTemplates( dtdSourceString):
    returnString = []

    ## expand NONGENOP_TEMPLATE
    ##                                  -1-  ----------2---------------  -3-  ----------4-------------  5-
    nonGenOpTemplateRE = re.compile( '^(.*?)(START_OF_NONGENOP_TEMPLATE)(.*?)(END_OF_NONGENOP_TEMPLATE)(.*)$', re.DOTALL)
    nonGenOpTemplateMO = nonGenOpTemplateRE.match( dtdSourceString)
    templateSourceString = nonGenOpTemplateMO.group( 3)
    returnString.append( nonGenOpTemplateMO.group( 1))
    for nonGenOpType in [ 'primitive', 'sequence']:
        for operationType in [ 'Union', 'Difference', 'Intersection', 'Subtraction']:
            for layerNumber in range( layerCount):
                gi = '%s%sAtL%d' % (
                    nonGenOpType,
                    operationType,
                    layerNumber,
                )
                returnString.append(
                    templateSourceString.replace(
                        'GI',
                        gi,
                    ).replace(
                        'LAYER',
                        '%d' % ( layerNumber),
                    )
                )
    returnString.append( nonGenOpTemplateMO.group( 5))
    returnString1 = ''.join( returnString)

    ## expand M.ALLOPSANDCONTAINERSATALLLAYERS
    genOpsList = []
    for layerNumber in range( layerCount):
        genOpsList.append( 'genOpAtL%d' % ( layerNumber))
    genOpsStr = '\n%s\n' % ( ' |\n'.join( genOpsList))

    nonGenOpsList = []
    for nonGenOpType in [ 'primitive', 'sequence']:
        for operationType in [ 'Union', 'Difference', 'Intersection', 'Subtraction']:
            for layerNumber in range( layerCount):
                nonGenOpsList.append( '%s%sAtL%d' % (
                    nonGenOpType,
                    operationType,
                    layerNumber,
                ))
    nonGenOpsStr = ' | '.join( nonGenOpsList)

    containersList = []
    for containerType in [ 'group', 'categoryExpression', 'diagonal', 'undeterminedSubsetOf']:
        for layerNumber in range( layerCount):
            containersList.append( '%sAtL%d' % (
                containerType,
                layerNumber,
            ))
    containerStr = ' | '.join( containersList)

    allOpsAndContainersStr = '%s | %s | %s' % (
        genOpsStr, nonGenOpsStr, containerStr,
    )

    returnString2 = returnString1.replace(
        'M.ALLOPSANDCONTAINERSATALLLAYERS',
        allOpsAndContainersStr,
    )

    returnString3 = '%s' % ( returnString2)  ## copy the string

    ## expand M.ALLOPSANDCONTAINERSATL0, etc.
    allOpsAtLStr = []
    for layerNumber in range( layerCount):
        replaceThis = 'M.ALLOPSANDCONTAINERSATL%d' % ( layerNumber)
        opNameList = []
        for nonGenOpType in [ 'primitive', 'sequence']:
            for operationType in [ 'Union', 'Difference', 'Intersection', 'Subtraction']:
                opNameList.append( '%s%sAtL%d' % (
                    nonGenOpType,
                    operationType,
                    layerNumber
                ))
        opNameList.append( 'genOpAtL%d' % ( layerNumber))
        opNameList.append( 'groupAtL%d' % ( layerNumber))
        opNameList.append( 'categoryExpressionAtL%d' % ( layerNumber))
        opNameList.append( 'undeterminedSubsetOfAtL%d' % ( layerNumber))
        opNameList.append( 'diagonalAtL%d' % ( layerNumber))
        
        replaceWith = ' | '.join( opNameList)
        returnString3 = returnString3.replace(
            replaceThis,
            replaceWith,
        )
        allOpsAtLStr.append( replaceWith)

    return returnString3

#######################################################

dtdStringAfterTemplateExpansion = expandDTDTemplates( dtdSourceString)



##                                                                                                         ----------5--------   --6--  --7--   8
##                            ------------1-------------   -----------2----------   ---------3---------   ---------------------4------------------                
entityDeclRE2 = re.compile( '(<!ENTITY[ \\011\\015\\012]+)(\\%[ \\011\\015\\012]+)?([A-Za-z0-9:\\.\\-_]+)(([ \\011\\015\\012]+")([^"]+)("[^>]*)(>))', re.DOTALL)
#######################################################
def expandParameterEntitiesInDtd( dtdString):
    global parameterEntities

    def _expandParameterEntitiesInDtd( MO2):
        global parameterEntities
        if MO2.group( 3) == ';':
            g3 = ''
        else:
            g3 = MO2.group( 3)
        return '%s%s' % (
            parameterEntities[ MO2.group( 2)],
            g3,
        )

    parameterEntities = {}
    for MO in re.finditer( entityDeclRE2, dtdString):
        if MO.group( 2) != None:
            parameterEntities[ MO.group( 3)] = MO.group( 6)
    parameterEntityNames = sorted( list( parameterEntities.keys()))
    
    while True:  ## have to keep doing this until things stop expanding
        oldDtdString = dtdString

        for parameterEntityName in parameterEntityNames:
            RE = re.compile( '(%%)(%s)([^A-Za-z0-9\\.\\-_])' % parameterEntityName)
            ## NOTE: In SGML, the last char of an entity reference can be any non-SGMLname character; not limited to ';'.
            dtdString = re.sub(
                RE,
                _expandParameterEntitiesInDtd,
                dtdString,
            )

        if oldDtdString == dtdString:
            break

    dtdString = re.sub( entityDeclRE2, '', dtdString)

    return dtdString

#######################################################

dtdStringAfterParameterEntityExpansion = expandParameterEntitiesInDtd( dtdStringAfterTemplateExpansion)



#######################################################
################ COMMON STUFF BEGINS HERE #############
#######################################################

tokenTweakDebug = defaultTokenTweakDebug
debug = False  ## used by message handlers, should normally be False

#######################################################
### development/delivery kludge starts here
#######################################################
try:
    subProc = subprocess.Popen(
        [ 'dnsdomainname',],
        stdout = subprocess.PIPE,
        stderr = subprocess.PIPE,
    )
    stdStr, errStr = subProc.communicate()
    dnsdomainname = stdStr.decode( 'us-ascii').strip()
except:
    dnsdomainname = ''

if dnsdomainname == 'rakis.net':  ## kludge for ieml.org website
    sys.path.append( '/home/ieml/public_html')
    from ch_show3 import show
elif dnsdomainname in [ 'coolheads.com', 'infoloom.com', '']:
    from ch_show3 import show
    debug = True
#######################################################
### development/delivery kludge ends here
#######################################################


#######################################################
def xml2star_cvsRevToSoftwareRev( cvsRevString):
    cvsRevStringMO = cvsRevStringRE.match( cvsRevString)
    revnumber = int( cvsRevStringMO.group( 4))
    if revnumber >= 87:
        return '1.%s' % ( revnumber - 87)
    


###########################
## MESSAGING BEGINS HERE ## 
###########################
#messaging.py
#this is a module used for messaging.  It allows multiple classes
#to handle various types of messages.  It should work on all python
#versions >= 1.5.2
# written by Christian Bird
# downloaded and adapted by SRN from
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/144838 on
# 20040323

import sys, string, traceback, types, os

#this flag determines whether debug output is sent to debug handlers themselves
#debug = True

#######################################################
def sendDebugMessages(debugging):
    global debug
    debug = debugging
#######################################################
class MessagingException(Exception):
    """an exception class for any errors that may occur in 
    a messaging function"""
    def __init__(self, args=None):
        self.args = args
#######################################################
class FakeException(Exception):
    """an exception that is thrown and then caught
    to get a reference to the current execution frame"""
    pass        
#######################################################        
class MessageHandler:
    """All message handlers should inherit this class.  Each method will be 
    passed a string when the executing program passes calls a messaging function"""
    def handleStdMsg(self, msg):
        """do something with a standard message from the program"""
        pass
    def handleErrMsg(self, msg):
        """do something with an error message.  This will already include the
        class, method, and line of the call"""
        pass
    def handleDbgMsg(self, msg):
        """do something with a debug message.  This will already include the
        class, method, and line of the call"""
        pass
#######################################################
class defaultMessageHandler(MessageHandler):
    """This is a default message handler.  It simply spits all strings to
    standard out"""
    def handleStdMsg(self, msg):
        global starErrorsStreamFO

        if msg.endswith( '\n'):
            writeOutput( starErrorsStreamFO, msg, '-StarErrors')
        else:
            writeOutput( starErrorsStreamFO, msg + "\n", '-StarErrors')
    def handleErrMsg(self, msg):
        global starErrorsStreamFO

        if msg.endswith( '\n'):
            writeOutput( starErrorsStreamFO, msg, '-StarErrors')
            writeOutput( internalErrorsStreamFO, msg, '-internalErrors')
        else:
            writeOutput( internalErrorsStreamFO, msg + "\n", '-internalErrors')
            writeOutput( starErrorsStreamFO, msg + "\n", '-StarErrors')
    def handleDbgMsg(self, msg):
        global starErrorsStreamFO

        if msg.endswith( '\n'):
            writeOutput( starErrorsStreamFO, msg, '-StarErrors')
        else:
            writeOutput( starErrorsStreamFO, msg + "\n", '-StarErrors')

#######################################################
#this keeps track of the handlers
_messageHandlers = []
#######################################################
#call this with the handler to register it for receiving messages
def registerMessageHandler(handler):
    """we're not going to check for inheritance, but we should check to make
    sure that it has the correct methods"""
    for methodName in ["handleStdMsg", "handleErrMsg", "handleDbgMsg"]:
        try:
            getattr(handler, methodName)
        except:            
            raise MessagingException("The class " + handler.__class__.__name__ + " is missing a " + methodName + " method")
    _messageHandlers.append(handler)
#######################################################
def getCallString(level):
    #this gets us the frame of the caller and will work
    #in python versions 1.5.2 and greater (there are better
    #ways starting in 2.1
    try:
        raise FakeException("this is fake")
    except Exception as e:
        #get the current execution frame
        f = sys.exc_info()[2].tb_frame
    #go back as many call-frames as was specified
    while level >= 0:        
        f = f.f_back
        level = level-1
    #if there is a self variable in the caller's local namespace then
    #we'll make the assumption that the caller is a class method
    obj = f.f_locals.get("self", None)
    functionName = f.f_code.co_name
    fileName = f.f_code.co_filename
    if obj:
        callStr = fileName+' : '+obj.__class__.__name__+"::"+functionName+"() (line "+str(f.f_lineno)+")"
    else:
        callStr = fileName+':'+functionName+"() (line "+str(f.f_lineno)+")"        
    return callStr        
    
#######################################################
#send this message to all handlers of std messages
def stdMsg(*args, **kwargs):
    newArgs = []

    alwaysSupplyNewLine = True
    if 'alwaysSupplyNewLine' in kwargs:
        alwaysSupplyNewLine = kwargs[ 'alwaysSupplyNewLine'] 
    
    for arg in args:
        if isinstance( arg, bytes):
            argToAppend = arg
        elif isinstance( arg, str):
            argToAppend = arg.encode( 'utf-8')
        else:
            argToAppend = repr( arg).encode( 'utf-8')
        newArgs.append( argToAppend)
    args = newArgs
    stdStr = ' '.join(list(map(str, args)))
    if not stdStr.endswith( '\n'):
        if alwaysSupplyNewLine:
            stdStr = stdStr + '\n'
    for handler in _messageHandlers:
        handler.handleStdMsg(stdStr)

#######################################################
#send this message to all handlers of error messages
def errMsg(*args):
    global STARPARSER_VERSION

    newArgs = []
    do_traceback = False
    for arg in args:
        if isinstance( arg, bytes):
            if arg.lower() == 'traceback':
                do_traceback = True
            else:
                newArgs.append( arg)
        elif isinstance( arg, str):
            if arg.lower() == 'traceback':
                do_traceback = True
            else:
#                newArgs.append( arg.encode( 'unicode_escape'))
                newArgs.append( arg)
        else:
#            newArgs.append( repr( arg).encode( 'unicode_escape'))
            newArgs.append( repr( arg))
    args = newArgs
    errStr = 'ERROR in %s %s: %s : %s' % (
        os.path.split( sys.argv[ 0])[ 1],
        STARPARSER_VERSION,
        getCallString( 1),
        ' '.join( list( map( str, args)),
    ))
    if not errStr.endswith( '\n'):
        errStr = errStr + '\n'
    if do_traceback:
        errStr = errStr+''.join( traceback.format_list( traceback.extract_stack()))
    if not errStr.endswith( '\n'):
        errStr = errStr + '\n'
    for handler in _messageHandlers:
        handler.handleDbgMsg(errStr)
#######################################################
#send this message to all handlers of debug messages
def dbgMsg(*args):
##     if not debug:
##         return
    newArgs = []
    do_traceback = False
    for arg in args:
        if isinstance( arg, bytes):
            if arg.lower() == 'traceback':
                do_traceback = True
            else:
                newArgs.append( arg)
        elif isinstance( arg, str):
            if arg.lower() == 'traceback':
                do_traceback = True
            else:
#                newArgs.append( arg.encode( 'unicode_escape'))
                newArgs.append( arg)
        else:
#            newArgs.append( repr( arg).encode( 'unicode_escape'))
            newArgs.append( repr( arg))
    args = newArgs
    dbgStr = "DEBUG "+os.path.split( sys.argv[ 0])[ 1]+":"+getCallString(1)+" : "+' '.join(list(map(str, args)))
    if not dbgStr.endswith( '\n'):
        dbgStr = dbgStr + '\n'
    if do_traceback:
        dbgStr = dbgStr+''.join( traceback.format_list( traceback.extract_stack()))
    if not dbgStr.endswith( '\n'):
        dbgStr = dbgStr + '\n'
    for handler in _messageHandlers:
        handler.handleDbgMsg(dbgStr)
#######################################################
registerMessageHandler(defaultMessageHandler())
#########################
## MESSAGING ENDS HERE ##
#########################

#######################################################
def trueOrFalse( arg):
    if arg.lower() == 'true':
        return True
    elif arg.lower() == 'false':
        return False
    else:
        starparser_usage( 'Expected \'True\' or \'False\', but found "%s" instead.' % ( arg), 1)
        
#######################################################
def charRecogStr( s):
    retStr = []
    if isinstance( s, list):
        sStr = s
    elif isinstance( s, str):
        sStr = list( s)
    else:
        errMsg( 'internal error: s is type %s' % ( type( s)))
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)

    for c in sStr:
        retStr.append( chr( ord( c) + numberOfNonChrTokenTypes))
    return ''.join( retStr)

#######################################################
def dictOfTokenTypesFromStartingAndEndingTokenTypeNames( namePairs):
    returnDict = {}

    if not isinstance( namePairs, tuple):
        errMsg( 'internal error: namePairs argument (%s) was not a tuple.' % (
            namePairs,
        ))
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)

    for pair in namePairs:
        if not isinstance( pair, tuple):
            errMsg( 'internal error: item in namePairs (%s) was not a tuple.' % (
                pair,
            ))
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)

        startingName = pair[ 0]
        endingName = pair[ 1]
        
        recording = False
        for tokenTypeName in tokenTypeNames:
            if tokenTypeName == startingName:
                recording = True
            if recording:
                returnDict[ tokenTypeNameToRegExpableChar[ tokenTypeName]] = None
            if tokenTypeName == endingName:
                if not recording:
                    errMsg( 'internal error: encountered ending name before starting name.  startingName = "%s", endingName = "%s"' % ( startingName, endingName))
                    if tokenTweakDebug:
                        import pdb
                        pdb.set_trace()
                    sys.exit( 1)
                recording = False
                break
        if recording or len( returnDict.keys()) < 1:
            errMsg( 'internal error; recording = %s ; len( returnDict) = %d; startingName = "%s", endingName = "%s"' % (
                recording, sorted( returnDict.keys()),
                startingName, endingName,
            ))
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)

    return returnDict

#######################################################
fPfXRE = re.compile( '[\x00-\x09\x0b-\x1f\x7f-%s]' % ( chr( MAXUNICODEVALUE)))
tPfXRE = re.compile( '[\x00-\x1f\x7f-%s]' % ( chr( MAXUNICODEVALUE)))
fPtXRE = re.compile( '[\x00-\x09\x0b-\x1f\x22\x26\x3c\x3e\x7f-%s]' % ( chr( MAXUNICODEVALUE)))
tPtXRE = re.compile( '[\x00-\x1f\x22\x26\x3c\x3e\x7f-%s]' % ( chr( MAXUNICODEVALUE)))
def listOfEntityRefifiedChars( buf, **kwargs):              ## this is used for tokenDisp displays
## Return a list of strings that has an item in it for each character in buf.  Each item
## is either the character itself, or its backslash-escaped repr.  E.g. the item for '\n' is '\\n'.

    #######################################################
    def entRefChar( thisChar):
        if thisChar == '<':
            return '&lt;'
        if thisChar == '>':
            return '&gt;'
        if thisChar == '&':
            return '&amp;'
        else:
            return '&#%d;' % ( ord( thisChar))
    #######################################################

    protectNewlines = False
    ## must be True for XML output that will be pretty-printed (which involves
    ## parsing, where the unprotected newlines of tokenDisp attributes
    ## will become space characters, and thus useless for this
    ## debugging display.  If True, each '\n' becomes '&#xa;'
    if 'protectNewlines' in kwargs:
        protectNewlines = kwargs[ 'protectNewlines']
    
    xmlEscapeForAttributeValue = False
    ## must be True for XML output.  If True, '"', '&', '<' and '>'
    ## will be entityRef-ified.
    if 'xmlEscapeForAttributeValue' in kwargs:
        xmlEscapeForAttributeValue = kwargs[ 'xmlEscapeForAttributeValue']

    if       ( not protectNewlines) and ( not xmlEscapeForAttributeValue): entityRefifiableRE = fPfXRE
    elif     (     protectNewlines) and ( not xmlEscapeForAttributeValue): entityRefifiableRE = tPfXRE
    elif     ( not protectNewlines) and (     xmlEscapeForAttributeValue): entityRefifiableRE = fPtXRE
    else: ## (     protectNewlines) and (     xmlEscapeForAttributeValue):
        entityRefifiableRE = tPtXRE

    bufList = list( buf)
    ctr = 0
    while ctr < len( bufList):
        if entityRefifiableRE.search( bufList[ ctr]):
            bufList[ ctr] = entRefChar( bufList[ ctr])
        ctr += 1
    return bufList




#######################################################
def showIemlBSet( iemlBSet):
    """
    Given an instance of IemlSolo, IemlTrio, or Binsc, return a
    string that displays its binary value.
    """

    #######################################################
    def displayMembersOfBinscSet( binscSet, indent):
        binscs = sorted( list( binscSet))
        msgList = []
        for binsc in binscs:
            msgList.append( '\n\n%sset member:' % ( ' '*indent))
            msgList.append( showBinsc( binsc, indent=(indent + 4)))
        return ''.join( msgList)
    #######################################################


    if isinstance( iemlBSet, IemlSolo):
        return displayMembersOfBinscSet( iemlBSet.binscs, 0)

    elif isinstance( iemlBSet, IemlTrio):
        msgList = []
        soloCtr = 0
        while soloCtr < 3:
            solo = iemlBSet.semes[ soloCtr]
            msgList.append( '\n\nseme %d' % ( soloCtr + 1))
            msgList.append( displayMembersOfBinscSet( solo.binscs, 4))
            soloCtr += 1
        return ''.join( msgList)

    elif isinstance( iemlBSet, Binsc):
        return showBinsc( iemlBSet.binsc)

    elif isinstance( iemlBSet, frozenset) or isinstance( iemlBSet, set):
        return showBinsc( iemlBSet)

    elif iemlBSet is None:
        return None

    else:
        errMsg( 'internal error: iemlBSet is of unexpected type "%s"; "%s"' % (
            type( iemlBSet),
            repr( iemlBSet),
        ))
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)

#######################################################
def showBinsc( binsc, encoding='us-ascii', **kwargs):
    """
    Given a string of IEML-B characters, return a string that
    represents them for human eyes.  Sort of like repr() for IEML-B
    strings.  A space appears after each L0 sequence, an extra space
    after each L1, an extra extra space after each L2, a line break
    after each L3 sequence, and an extra line break after each L4
    sequence.
    """

    indent = 0
    if 'indent' in kwargs:
        indent = kwargs[ 'indent'] 

    msgList = []

    if isinstance( binsc, bytes):
        pass
    elif isinstance( binsc, bytearray):
        binsc = bytes( binsc, encoding)
    elif isinstance( binsc, str):
        binsc = bytes( binsc, encoding)
    else:
        errMsg( 'binsc is not a bytes, str, or bytearray; it\'s a %s' % ( type( binsc)))
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)

    binscLen = len( binsc)
    if binscLen not in lenL:
        errMsg( 'length of binsc (i.e., %d) is not one of %s, so this display is nonsense:' % (
                binscLen,
                sorted( list( lenL.keys())),
            ),
        )
        msgList.append( '%s(This display is nonsense; binsc\'s length is %d:)\n%s' % (
            ' ' * indent,
            binscLen,
            ' ' * indent,
        ))
    
    if binscLen > 27:
        msgList.append( '\n')
        msgList.append( ' ' * indent)
    ctr = 0
    while ctr < binscLen:
        if binsc[ ctr] > 0x3f:
            errMsg( 'invalid IEML-B character at position %d' % ( ctr))
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)

        space = ''
        if ctr > 0:
            space = ' '
            if ( ctr % 3) == 0:
                msgList.append( ' ')
            if ( ctr % 9) == 0:
                msgList.append( ' ')
            if ( ctr % 27) == 0:
                msgList.append( '\n')
                msgList.append( ' ' * indent)
                space = ''
            if ( ctr % 81) == 0:
                msgList.append( '\n')
                msgList.append( ' ' * indent)

        msgList.append( '%s%02x' % (
            space,
            binsc[ ctr] & 0x3f,
        ))
        ctr += 1
    return ''.join( msgList)


#######################################################
def showPlaceInString( stringToBeDisplayed, first, last, indent=0, INDENT=2, **kwargs):

    protectNewlines = False
    ## must be True for XML output that will be pretty-printed (which involves
    ## parsing, where the unprotected newlines of tokenDisp attributes
    ## will become space characters, and thus useless for this
    ## debugging display.  If True, each '\n' becomes '&#xa;'
    if 'protectNewlines' in kwargs:
        protectNewlines = kwargs[ 'protectNewlines']
    
    xmlEscapeForAttributeValue = False
    ## must be True for XML output.  If True, '"', '&', '<' and '>'
    ## will be entityRef-ified.
    if 'xmlEscapeForAttributeValue' in kwargs:
        xmlEscapeForAttributeValue = kwargs[ 'xmlEscapeForAttributeValue']

    badCharMap = None  ## this is only used when there are discontinuous bad chars to be pointed out in an error message.
    if 'badCharMap' in kwargs:
        badCharMap = kwargs[ 'badCharMap']

    if not isinstance( first, int):
        first = int( first)

    if not isinstance( last, int):
        last = int( last)

    ERifiedCharList = listOfEntityRefifiedChars(
        stringToBeDisplayed,
        xmlEscapeForAttributeValue = xmlEscapeForAttributeValue,
        protectNewlines = False, ## False because we *count* a newline as *one* character, because that's what it's going to look like
                                 ## after the XML parser parses it.
    )

    outputLines = []
    symbolLine = ''
    caretLine = ''
    charCtr = 0
    while charCtr < len( ERifiedCharList):
        thisString = ERifiedCharList[ charCtr]

        if thisString == '\n':
            outputLines.append( '%s%s\n' % (
                ' ' * indent * INDENT,
                symbolLine,
            ))
            if len( caretLine.strip()) > 0:
                outputLines.append( '%s%s\n' % (
                    ' ' * indent * INDENT,
                    caretLine,
                ))
            symbolLine = ''
            caretLine = ''
        else:
            symbolLine = '%s%s' % ( symbolLine, thisString)
            if badCharMap == None:
                if charCtr < first or charCtr > last:   ## use first and last attributes to decide whether to show a caret
                    caretLine = '%s%s' % ( caretLine, ' ' * len( thisString))
                else:
                    caretLine = '%s%s' % ( caretLine, '^' * len( thisString))
            else:
                if badCharMap[ charCtr] == True:   ## use badCharMap to decide whether to show a caret
                    caretLine = '%s%s' % ( caretLine, '^' * len( thisString))
                else:
                    caretLine = '%s%s' % ( caretLine, ' ' * len( thisString))

        charCtr += 1

    if len( symbolLine) > 0:

        outputLines.append( '%s%s\n' % (
            ' ' * indent * INDENT,
            symbolLine,
        ))
        if len( caretLine.strip()) > 0:
            outputLines.append( '%s%s  \n' % (  ## extra spaces at end of caretLine for clarity, because a " will be tacked onto it later.
                ' ' * indent * INDENT,
                caretLine,
            ))

    if outputLines[ -1].endswith( '\n'):
        outputLines[ -1] = outputLines[ -1][ :-len( '\n')]
    if protectNewlines:
        return ''.join( outputLines).replace( '\n', '&#xa;')
    else:
        return ''.join( outputLines)


#######################################################
################ COMMON STUFF ENDS HERE ###############
#######################################################




#######################################################
################ STARPARSER BEGINS HERE ###############
#######################################################

## exception whose purpose is to get us out of a deeply nested loop
class TokenTreeChangedException( Exception):
    pass

anyChrTokenTypeRE = re.compile( r'[%s-%s]' % (  ## match any char token
    chr( ordinalOfLeastChar),
    chr( MAXUNICODEVALUE),
))

## Auditing.  These dicts keep track of what still needs parsing and what
## doesn't.  
tokensDict = {

    'id': {},         ## keys are token id values (ints), values are tokens

    'tokenType': {},  ## keys are tokenType regexpable chars; values are dicts
                      ## whose keys are token id values, and whose values are tokens.
}
outOfScopeTokensDict = {  ## Like tokensDict, but these tokens have moved out of scope.
    'id': {},             ## Unlike tokensDict, this dict has no purpose other than
    'tokenType': {},      ## debugging
}

## token serial number variable
currentTokenId = 0  

#######################################################
class Token:

    #######################################################
    def __init__( self, tokenType, zubTokens, **kwargs):
        global currentTokenId

        currentTokenId += 1  ## which makes the first one 1
        self.id = currentTokenId

        if len( tokenType) == 1:
            if (
                ( tokenType not in regExpableCharToTokenTypeName)
                and
                ( not anyChrTokenTypeRE.match( tokenType))
            ):
                errMsg( 'internal error; unrecognized regexapable token type "%s"' % (
                    repr( tokenType),
                ))
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)
            self.tokenType = tokenType
        else:
            try:
                self.tokenType = tokenTypeNameToRegExpableChar[ tokenType]
            except KeyError:
                errMsg( 'internal error; unrecognized token type name "%s"' % (
                    tokenType,
                ))
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)

        self.addSelfToTokensDict()

        self.zubTokenLists = [ zubTokens]

        self.first = None
        self.last = None

        for kwargKey in kwargs:
            if kwargKey == 'first':
                self.first = kwargs[ kwargKey]
                if not 'last' in kwargs:
                    errMsg( 'last keyword argument not specified', 'traceback')
                    if tokenTweakDebug:
                        import pdb
                        pdb.set_trace()
                    sys.exit( 1)
            elif kwargKey == 'last':
                self.last = kwargs[ kwargKey]
                if not 'first' in kwargs:
                    errMsg( 'first keyword argument not specified', 'traceback')
                    if tokenTweakDebug:
                        import pdb
                        pdb.set_trace()
                    sys.exit( 1)
            else:
                setattr( self, kwargKey, kwargs[ kwargKey])

        if self.first == None:
            self.first = self.zubTokenLists[ -1][ 0].first
            self.last = self.zubTokenLists[ -1][ -1].last

        setAttributeOfTokensInList( self.zubTokenLists[ 0], 'parent', self)

    #######################################################
    def addSelfToTokensDict( self):
        global tokensDict
        
        if not self.tokenType in tokensDict[ 'tokenType']:
            tokensDict[ 'tokenType'][ self.tokenType] = {}
        tokensDict[ 'id'][ self.id] = tokensDict[ 'tokenType'][ self.tokenType][ self.id] = self
        
    #######################################################
    def removeSelfFromTokensDict( self):
        global tokensDict, outOfScopeTokensDict
        
        if self.tokenType not in outOfScopeTokensDict[ 'tokenType']:
            outOfScopeTokensDict[ 'tokenType'][ self.tokenType] = {}
        outOfScopeTokensDict[ 'id'][ self.id] = outOfScopeTokensDict[ 'tokenType'][ self.tokenType][ self.id] = self
        
        del tokensDict[ 'tokenType'][ self.tokenType][ self.id]
        del tokensDict[ 'id'][ self.id]

    #######################################################
    def tokenTypeName( self):
        return tokenTypeCharToTokenTypeName( self.tokenType)

    #######################################################
    def subTRxStr( self):  ## regex-able string of subtoken tokentypes
        return rxTTStr( self.zubTokenLists[ -1])

    #######################################################
    def copy( self, **kwargs): 
        removeSelfFromTokensDict = False
        newTokenTypeName = self.tokenTypeName()
        for kwarg in kwargs:
            if kwarg == 'removeSelfFromTokensDict':
                removeSelfFromTokensDict = kwargs[ kwarg]
            elif kwarg == 'newTokenTypeName':
                newTokenTypeName = kwargs[ kwarg]
            else:
                errMsg( 'internal error: unrecognized kwarg "%s"' % ( kwarg))
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)


        thisCopy = Token(
            newTokenTypeName,
            self.zubTokenLists[ -1],
            first = self.first,
            last = self.last,
        )
        if removeSelfFromTokensDict:
            thisCopy.removeSelfFromTokensDict() ## the copy is NOT put into the tokensDict database
        for attributeName in dir( self):
            if attributeName.startswith( '__'): continue
            if attributeName in [ 'id', 'first', 'last', 'tokenType', 'zubTokenLists', ]: continue
            if isinstance( getattr( self, attributeName), types.MethodType): continue
            setattr( thisCopy, attributeName, getattr( self, attributeName))
        return thisCopy

    #######################################################
    def findAncestorOfType( self, ancTypeNameStartsWith):
        childToken = self
        while True:
            if childToken == None:
                return None
            if not hasattr( childToken, 'parent'):
                return None
            parentToken = childToken.parent
            if parentToken.tokenTypeName().startswith( ancTypeNameStartsWith):
                return parentToken
            if hasattr( parentToken, 'containedIn'):
                for containedInToken in parentToken.containedIn:
                    if containedInToken.tokenTypeName().startswith( ancTypeNameStartsWith):
                        return parentToken
            childToken = parentToken

    #######################################################
    def findRoot( self):
        childToken = self
        while True:
            if childToken == None:
                return None
            if not hasattr( childToken, 'parent'):
                return childToken
            if hasattr( childToken, 'parent') and childToken.parent is None:
                return childToken
            parentToken = childToken.parent

    #######################################################
    def removeSelfFromParentSubtokens( self):
        if not hasattr( self, 'parent'): return
        if self.parent == None: return
        found = False
        newZubTokenList = []
        for siblingToken in self.parent.zubTokenLists[ -1]:
            if id( siblingToken) == id( self):
                found = True
            else:
                newZubTokenList.append( siblingToken)
        if not found:
            errMsg( 'internal error: could not find self among parent\'s subtokens.')
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)
        self.parent.zubTokenLists.append( newZubTokenList)
        return

#######################################################
def rxTTStr( tokenList):
    """
return a regexpable string of the token types of each token in the list
    """
    chrList = []
    for token in tokenList:
        chrList.append( token.tokenType)
    return ''.join( chrList)
    

#######################################################
def tokenTypeCharToTokenTypeName( tokenTypeChar):    
    if anyChrTokenTypeRE.match( tokenTypeChar):
        return 'char_%s' % ( hexBackslashify( chr( ord( tokenTypeChar) - numberOfNonChrTokenTypes)))
    elif tokenTypeChar in regExpableCharToTokenTypeName:
        return regExpableCharToTokenTypeName[ tokenTypeChar]
    elif ord( tokenTypeChar) < 128:
        return '%s' % ( tokenTypeChar)  ## this is not actually a token type character
    else:
        errMsg( 'internal error; tokenTypeChar == "%s"' % ( tokenTypeChar))
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)
        
#######################################################
def tokenTypeStringToTokenTypeNameList( tokenTypeString):    
    retList = []
    for tokenTypeChar in list( tokenTypeString):
        retList.append( tokenTypeCharToTokenTypeName( tokenTypeChar))
    return retList



#######################################################
ord0 = ord( '0')
ord7 = ord( '7')
ord9 = ord( '9')
orda = ord( 'a')
ordf = ord( 'f')
ordA = ord( 'A')
ordF = ord( 'F')
def interpretBackslashes( stringToInterpret):

    #######################################################
    def octalStringToCharacter( octalString):

        charCtr = len( octalString) - 1
        factor = 1
        total = 0
        while charCtr >= 0:
            thisChar = octalString[ charCtr]
            thisCharOrd = ord( thisChar)
            if thisCharOrd >= ord0 and thisCharOrd <= ord7:
                digitValue = thisCharOrd - ord0
            else:
                errMsg( 'internal error: bad ord value: %d' % ( thisCharOrd))
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)

            total += ( digitValue * factor)
            factor *= 8

            charCtr -= 1

        return '%s' % ( chr( total))
    #######################################################

    #######################################################
    def hexStringToCharacter( hexString):

        charCtr = len( hexString) - 1
        factor = 1
        total = 0
        while charCtr >= 0:
            thisChar = hexString[ charCtr]
            thisCharOrd = ord( thisChar)
            if thisCharOrd >= ord0 and thisCharOrd <= ord9:
                digitValue = thisCharOrd - ord0
            elif thisCharOrd >= orda and thisCharOrd <= ordf:
                digitValue = thisCharOrd - orda
            elif thisCharOrd >= ordA and thisCharOrd <= ordF:
                digitValue = thisCharOrd - ordA
            else:
                errMsg( 'internal error: bad ord value: %d' % ( thisCharOrd))
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)

            total += ( digitValue * factor)
            factor *= 16

            charCtr -= 1

        return '%s' % ( chr( total))
    #######################################################

    if '\\' not in stringToInterpret: return stringToInterpret
    newSL = []
    ctr = 0
    while ctr < len( stringToInterpret):
        c = stringToInterpret[ ctr]
        if c != '\\':
            newSL.append( c)
        else:
            ctr += 1
            c = stringToInterpret[ ctr]
            ordC = ord( c)
            if c == 'x': 
                hexDigitList = []
                hexDigitCounter = 0
                while hexDigitCounter < 2:
                    ctr += 1
                    if ctr >= len( stringToInterpret): break
                    c = stringToInterpret[ ctr]
                    ordC = ord( c)
                    if ( ordC >= ord0 and ordC <= ord9) or ( ordC >= orda and ordC <= ordf) or ( ordC >= ordA and ordC <= ordF):
                        hexDigitList.append( c)
                        hexDigitCounter += 1
                    else:
                        ctr -= 1
                        break
                newSL.append( hexStringToCharacter( ''.join( hexDigitList)))
            elif c == 'u': 
                hexDigitList = []
                hexDigitCounter = 0
                while hexDigitCounter < 4:
                    ctr += 1
                    if ctr >= len( stringToInterpret): break
                    c = stringToInterpret[ ctr]
                    ordC = ord( c)
                    if ( ordC >= ord0 and ordC <= ord9) or ( ordC >= orda and ordC <= ordf) or ( ordC >= ordA and ordC <= ordF):
                        hexDigitList.append( c)
                        hexDigitCounter += 1
                    else:
                        ctr -= 1
                        break
                newSL.append( hexStringToCharacter( ''.join( hexDigitList)))
            elif ordC >= ord0 and ordC <= ord7:
                octalDigitList = [ c]
                octalDigitCounter = 1
                while octalDigitCounter < 3:
                    ctr += 1
                    if ctr >= len( stringToInterpret): break
                    c = stringToInterpret[ ctr]
                    ordC = ord( c)
                    if ordC >= ord0 and ordC <= ord7:
                        octalDigitList.append( c)
                        octalDigitCounter += 1
                    else:
                        ctr -= 1
                        break
                newSL.append( octalStringToCharacter( ''.join( octalDigitList)))
            elif c == 'n':
                newSL.append( '\012')
            elif c == '\\':
                newSL.append( '\\')
            elif c == '\f':
                newSL.append( '\f')
            elif c == '\r':
                newSL.append( '\r')
            elif c == '\t':
                newSL.append( '\t')
            else:
                dbgMsg('Case of backslash error in "%s"' % stringToInterpret)

                errMsg( 'internal error: bad character after backslash: "%s".\ns == "%s".\n' % (
                    repr( c),
                    stringToInterpret,
                ))
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)

        ctr += 1

    return ''.join( newSL)

#######################################################
def tokenTypeNameListToRegexpableString( ttnList):
    retStr = ''
    for ttn in ttnList:
        ttn = interpretBackslashes( ttn)
        if ttn.startswith( 'char_'):
            retStr = '%s%s' % ( retStr, chr( ord( ttn[ 5]) + numberOfNonChrTokenTypes))
        else:
            retStr = '%s%s' % ( retStr, tokenTypeNameToRegExpableChar[ ttn])
    return retStr

#######################################################
def setAttributeOfTokensInList( tokenList, attributeName, attributeValue, **kwargs):

    removeSelfFromTokensDict = False
    for kwarg in kwargs:
        if kwarg == 'removeSelfFromTokensDict':
            removeSelfFromTokensDict = kwargs[ 'removeSelfFromTokensDict']
        else:
            errMsg( 'internal error')
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)
            
    for token in tokenList:
        setattr( token, attributeName, attributeValue)
        if removeSelfFromTokensDict:
            token.removeSelfFromTokensDict()
    return tokenList


###############################
## DEBUGGING FUNCTIONS BELOW ##
###############################
def printPyAttsDict():
    """
    This function is for debugging ONLY.
    """
    pyAttsDict = {}

    for thisDict in [ tokensDict[ 'id'], outOfScopeTokensDict[ 'id'],]:
        for tokenId in thisDict:
            token = thisDict[ tokenId]
            tokenTypeName = tokenTypeCharToTokenTypeName( token.tokenType)
            if tokenTypeName.startswith( 'char_'): continue
            if tokenTypeName not in pyAttsDict:
                pyAttsDict[ tokenTypeName] = {}
            for pyAtt in dir( token):
                if pyAtt.startswith( '__'): continue
                if isinstance( getattr( token, pyAtt), types.MethodType): continue
                if pyAtt not in pyAttsDict[ tokenTypeName]:
                    pyAttsDict[ tokenTypeName][ pyAtt] = []
                pyAttsDict[ tokenTypeName][ pyAtt].append( ( token.id, getattr( token, pyAtt)))

    show( pyAttsDict, maxRecur = 2)

#######################################################
def printPyAttsDict2():
    """
    This function is for debugging ONLY.
    """
    pyAttsDict = {}

    for thisDict in [ tokensDict[ 'id'], outOfScopeTokensDict[ 'id'],]:
        for tokenId in thisDict:
            token = thisDict[ tokenId]
            for pyAtt in dir( token):
                if pyAtt.startswith( '__'): continue
                if isinstance( getattr( token, pyAtt), types.MethodType): continue
                if pyAtt not in pyAttsDict:
                    pyAttsDict[ pyAtt] = []
                pyAttsDict[ pyAtt].append( getattr( token, pyAtt))

    keys = sorted( list( pyAttsDict.keys()))
    for key in keys:
        valueList = pyAttsDict[ key]
        typeDict = {}
        for value in valueList:
            thisType = repr( type( value))
            if thisType not in typeDict:
                typeDict[ thisType] = None
        print( "%s  valueTypes: %s" % (
            key,
            ' '.join( sorted( list( typeDict.keys())))
        ))



#######################################################
def printTokenList( thisTokenList, **kwargs):
    """
    This function is for debugging ONLY.
    """

    for thisToken in thisTokenList:
        callStr = ''
        callStr = '%s%s' % ( callStr, 'printToken( thisToken,')
        for kwarg in kwargs:
            callStr = '%s %s=%s,' % ( callStr, kwarg, kwargs[ kwarg])
        callStr = '%s)' % ( callStr)
        exec( callStr)

    return

#######################################################
def printToken( thisToken, **kwargs):
    """
    This function is for debugging ONLY.
    """

    callStr = 'pmsg = "%s%s" % ( pmsg, showToken( thisToken,'
    for kwarg in kwargs:
        callStr = '%s %s=%s,' % ( callStr, kwarg, kwargs[ kwarg])
    callStr = '%s))' % ( callStr)
    pmsg = ''
    returnDict = locals()
    exec( callStr, globals(), returnDict)

    writeOutput( internalErrorsStreamFO, returnDict[ 'pmsg'], '-internalErrors')
    return

#######################################################
def printTokensDict( tokensDict):
    """
    This function is for debugging ONLY.
    """
    writeOutput( internalErrorsStreamFO, showTokensDict( tokensDict), '-internalErrors')

#######################################################
def showTokensDict( tokensDict):
    """
    This function is for debugging ONLY.
    """
    tokenTypeNameList = []
    for tokenType in tokensDict[ 'tokenType']:
        tokenTypeNameList.append( tokenTypeCharToTokenTypeName( tokenType))
    tokenTypeNameList = sorted( tokenTypeNameList)
    msg = '\n%s\ntokensDict\n%s\n' % ( '='*60, '-'*60)
    for tokenTypeName in tokenTypeNameList:
        tokenIdList = sorted( tokensDict[ 'tokenType'][ tokenTypeNameListToRegexpableString( [ tokenTypeName])].keys())
        if len( tokenIdList) > 0:
            msg = '%s\n%s :\n' % ( msg, tokenTypeName)
            for tokenId in tokenIdList:
                msg = '%s%s' % (
                    msg,
                    showToken(
                        tokensDict[ 'tokenType'][ tokenTypeNameListToRegexpableString( [ tokenTypeName])][ tokenId],
                        indent = 2,
                        maxRecur = 0,
                    ),
                )
    return msg

#######################################################
def showTokenList( tokenList, outputHandle = None, **kwargs):
    """
    This function is for debugging ONLY.
    """
    msg = ''
    for thisToken in tokenList:
        callStr = 'innermsg = "%s%s" % ( msg, showToken( thisToken,'
        for kwarg in kwargs:
            callStr = '%s %s=%s,' % ( callStr, kwarg, kwargs[ kwarg])
        callStr = '%s))' % ( callStr)
        returnDict = locals()
        exec( callStr, globals(), returnDict)
        msg = '%s%s' % ( msg, returnDict[ 'innermsg'])
    if outputHandle is not None:
        outputHandle.write( msg)
        return None
    return msg

#######################################################
def showMO( MO, pattern=None, outputHandle=None):
    """
    Format a string showing the contents of a regexp match object, interpreting the characters.
    pattern is for the fallback in case the MO is None
    """

    msgList = []

    if MO is None:
        return '(None -- no match) pattern:\n%s' % ( groupifyREPatternDisplay( tokenTypeStringToTokenTypeNameList( pattern)))

    if len( MO.string) < 70:
        msgList.append( ' .string: %s\n' % (
            tokenTypeStringToTokenTypeNameList( MO.string),
        ))
    else:
        msgList.append( ' .string: %s... (len=%d)\n' % (
            tokenTypeStringToTokenTypeNameList( MO.string[ :65]),
            len( MO.string),
        ))

    msgList.append( ' .re.pattern:\n%s' % ( groupifyREPatternDisplay( tokenTypeStringToTokenTypeNameList( MO.re.pattern))))
##     msgList.append( ' .re.pattern: %s\n' % (
##         tokenTypeStringToTokenTypeNameList( MO.re.pattern),
##     ))

    ctr = 0
    while True:
        try:
            msgList.append( ' .group( %d): %s\n' % (
                ctr,
                tokenTypeStringToTokenTypeNameList( MO.group( ctr)),
            ))
        except IndexError:
            break
        except TypeError:
            msgList.append( ' .group( %d): %s\n' % (
                ctr,
                repr( MO.group( ctr)),
            ))
        ctr += 1

    msgList.append( ' .start(): %s\n' % (
        repr( MO.start()),
    ))
    msgList.append( ' .end(): %s\n' % (
        repr( MO.end()),
    ))

    if outputHandle is not None:
        outputHandle.write( ''.join( msgList))
    return ''.join( msgList)

#######################################################
def groupifyREPatternDisplay( patternTokens):
    patternStr = ' '.join( patternTokens)
    itemList = []
    depth = 0
    for c in patternStr:
        if c != '(' and len( itemList) == 0:
            itemList.append( [ c])
        elif c == '(' and depth == 0:
            itemList.append( [ c])
            depth += 1
        elif c == '(':
            depth += 1
            itemList[ -1].append( c)
        elif c == ')':
            depth -= 1
            itemList[ -1].append( c)
        else:
            itemList[ -1].append( c)
    list2 = []
    ctr = 0
    for item in itemList:
        ctr += 1
        list2.append( '      %2d:  %s\n' % ( ctr, ''.join( item)))
    return ''.join( list2)

###############################
## DEBUGGING FUNCTIONS ABOVE ##
###############################

#######################################################
def normalizeRecordEnds( buf):
    ##                   DOS    to   Unix             Mac  to Unix
    return buf.replace( '\015\012', '\012').replace( '\015', '\012')  ## internally, we're using the Unix convention, which is '\012' for every record-end


hexBackslashable2RE = re.compile( '[\x00-\x20\x5c\x7f-\uffff]')  ## backslashify everything below ' ' (space), space itself, backslash itself, and everything above 0x7f
#######################################################
def hexBackslashify( buf):
    """
    It replaces characters in buf with their backslash-escaped notations.
    For example, a tab character becomes: \011, (which by the way is the
    hex representation for '\t'), i.e., a byte whose value is 9.
    """
    #######################################################
    def hexBackslashifyCharacter( MO):
        if MO.group( 0) == '\\':
            return '\\\\'
        if ord( MO.group( 0)) <= 255:
            return '\\x%02x' % ( ord( MO.group( 0)))
        else:
            return '\\u%04x' % ( ord( MO.group( 0)))

    return \
        re.sub(
            hexBackslashable2RE,
            hexBackslashifyCharacter,
            buf,
        )


    #######################################################
    def callShowTokenRecursively( msg, thisToken, recurOnAttributeNameOrRawTokenList, recurOnAttributeName, kwargs):
        global idsInRecursionAlready

        msgList = [ msg]

        if isinstance( recurOnAttributeNameOrRawTokenList, list):
            tokenList = recurOnAttributeNameOrRawTokenList
        elif isinstance( recurOnAttributeNameOrRawTokenList, str):
            if hasattr( thisToken, recurOnAttributeNameOrRawTokenList) and len( getattr( thisToken, recurOnAttributeNameOrRawTokenList)) > 0:
                tokenList = getattr( thisToken, recurOnAttributeNameOrRawTokenList)
            else:
                return msg
        else:
            errMsg( 'internal error')
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)

        otherTokenCtr = 0
        for otherToken in tokenList:
            otherTokenCtr += 1
            if otherToken.id in idsInRecursionAlready:
                msgList.append( '%s%s' % (
                    ' ' * indent * INDENT,
                    '-' * 60,
                ))
                msgList.append( '\n%s%s' % (
                    ' ' * (( indent * INDENT) + 5),
                    'item %d in the %s attribute of %s (id=%d)' % (
                        otherTokenCtr,
                        'zubTokenLists[ -1]',
                        thisToken.tokenTypeName(),
                        thisToken.id,
                    ),
                ))
                msgList.append( '\n%s%s' % (
                    ' ' * indent * INDENT,
                    '-' * 60,
                ))
                msgList.append( '\n%s(not looping on) %s\n' % (
                    ' ' * indent * INDENT,
                    oneLineTokenCharacterization( otherToken),
                ))
                continue

            idsInRecursionAlready[ otherToken.id] = None

            callStr = 'pmsg = "%s%s" % ( pmsg, showToken( otherToken, indent = indent + 1,'
            for kwarg in kwargs:
                if kwarg == 'indent': continue
                elif kwarg == 'maxRecur':
                    callStr = '%s %s=%s,' % ( callStr, kwarg, kwargs[ kwarg] - 1)
                elif kwarg == 'recurMsg':
                    pass
                else:
                    callStr = '%s %s=%s,' % ( callStr, kwarg, kwargs[ kwarg])
            callStr = '%s recurMsg="item %d in the %s attribute of %s (id=%d)",' % (
                callStr,
                otherTokenCtr,
                recurOnAttributeName,
                thisToken.tokenTypeName(),
                thisToken.id,
            )
            if ' recursive=' not in callStr:
                callStr = '%s recursive=True' % (
                    callStr,
                )
            callStr = '%s))' % ( callStr)
            pmsg = ''
            localsReturnDict = locals()
            globalsReturnDict = globals()

            exec( callStr, globalsReturnDict, localsReturnDict)

            msgList.append( localsReturnDict[ 'pmsg'])

            try:
                del globalsReturnDict[ 'idsInRecursionAlready'][ otherToken.id]
            except KeyError:
                pass

        return ''.join( msgList)
    #######################################################
#######################################################
def showToken( thisToken, **kwargs):

    """
    Following are all the python attributes of Token objects, in the order in which they should be displayed.

    In the text below:

    * means the value is a token or a list of tokens and it should be displayed as such, with recursion
    X means the value is a token or a list of tokens and it should be displayed as such, but WITHOUT recursion
      means the value is not a token or a list of tokens.
    + means shown in a one-line characterization

     +    tokenType  valueTypes: <class 'str'>
        X parent  valueTypes: <class '__main__.Token'>
        X zubTokenLists  valueTypes: <class 'list'>

          expression

        * complexAtL0  valueTypes: <class 'list'>
        * complexBinAtL0  valueTypes: <class 'list'>
        * complexAtL1  valueTypes: <class 'list'>
        * complexBinAtL1  valueTypes: <class 'list'>
        * complexAtL2  valueTypes: <class 'list'>
        * complexBinAtL2  valueTypes: <class 'list'>
        * complexAtL3  valueTypes: <class 'list'>
        * complexBinAtL3  valueTypes: <class 'list'>
        * complexAtL4  valueTypes: <class 'list'>
        * complexBinAtL4  valueTypes: <class 'list'>
        * complexAtL5  valueTypes: <class 'list'>
        * complexBinAtL5  valueTypes: <class 'list'>
        * complexAtL6  valueTypes: <class 'list'>
        * complexBinAtL6  valueTypes: <class 'list'>

     +  * containedIn  valueTypes: <class 'list'>

     +    primitiveSet  valueTypes: <class 'frozenset'>
     +    symbol  valueTypes: <class 'str'>
        * symbolTokens  valueTypes: <class 'list'>
     +    layerNumber  valueTypes: <class 'int'>
     +    roleNumber  valueTypes: <class 'int'>

        * leadingCIWTokens  valueTypes: <class 'list'>
          startDelimiterText  valueTypes: <class 'str'>
        * startDelimiterTokens  valueTypes: <class 'list'>
        * preGenOpCIW  valueTypes: <class 'list'>
        * postSeme1CIW
        * postSeme2CIW
        * postSeme3CIW
        * postGenOpCIW  valueTypes: <class 'list'>

        * preNonGenOpCIW
        * preOperatorCIW
        * postOperatorCIW
        * postNonGenOpCIW

     +    operator  valueTypes: <class 'str'>
        * operatorTokens  valueTypes: <class 'list'>
     +  * operands  valueTypes: <class 'list'>

        * semes  valueTypes: <class 'list'>

          layerMark  valueTypes: <class 'str'>
        * layerMarkTokens  valueTypes: <class '__main__.Token'>

          text  valueTypes: <class 'str'>
          textWithDelimitedComments  valueTypes: <class 'str'>

     +    parameterIdentifier  valueTypes: <class 'str'>
        * parameterIdentifierTokens  valueTypes: <class 'list'>
        * postParameterIdentifierCIW  valueTypes: <class 'list'>
          endDelimiterText  valueTypes: <class 'str'>
        * endDelimiterTokens  valueTypes: <class 'list'>
        * trailingCIWTokens  valueTypes: <class 'list'>

          binary  valueTypes: <class 'IemlSolo'> or <class 'IemlTrio'>  ??????

        * prologueTokens  valueTypes: <class 'list'>
          prologueText  valueTypes: <class 'str'>
        * epilogueTokens  valueTypes: <class 'list'>
          epilogueText  valueTypes: <class 'str'>
          first  valueTypes: <class 'int'>
          last  valueTypes: <class 'int'>
     +    id  valueTypes: <class 'int'>
          implicit  valueTypes: <class 'bool'>
        X originalTokens  valueTypes: <class 'list'>
    """


    #######################################################
    def showSubtokenSummary( msg, thisToken, indent, attributeNameOrRawList, displayedAttributeName = None):
        if isinstance( attributeNameOrRawList, list):
            tokenList = attributeNameOrRawList
        elif isinstance( attributeNameOrRawList, str):
            if not hasattr( thisToken, attributeNameOrRawList):
                return msg
            tokenList = getattr( thisToken, attributeNameOrRawList)
            if not isinstance( tokenList, list):
                tokenList = [ tokenList]
        else:
            errMsg( 'internal error: type(attributeNameOrRawList) == "%s"' % ( type( attributeNameOrRawList)))
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)

        if len( tokenList) == 0:
            return msg

        if displayedAttributeName == None:
            if  not isinstance( attributeNameOrRawList, list):
                displayedAttributeName = attributeNameOrRawList
            else:
                errMsg( 'internal error.  Since attributeName is a list, a value must be provided for the displayedAttributeName argument')
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)

        ctr = -1
        for subToken in tokenList:
            ctr += 1
            text = oneLineTokenCharacterization( subToken)
            if ctr == 0:
                msg = '%s%s%25.25s : %s\n' % (
                    msg,
                    ' '*indent*INDENT,
                    displayedAttributeName,
                    text,
                )
            else:
                msg = '%s%s%25.25s : %s\n' % (
                    msg,
                    ' '*indent*INDENT,
                    '',
                    text,
                )
        return msg

    #######################################################
    def oneLineTokenCharacterization( token):
        msg = []
        msg.append( 'typ=%s' % ( token.tokenTypeName()))

        if hasattr( token, 'containedIn'):
            msg.append( 'cntnrs=[')
            for containerToken in token.containedIn:
                msg.append( '%s-%d' % ( containerToken.tokenTypeName(), containerToken.id,))
                if hasattr( containerToken, 'parameterIdentifier'):
                    msg.append( 'pi=%s' % ( containerToken.parameterIdentifier))
                if hasattr( containerToken, 'piFirst'):
                    msg.append( 'piFirst=%s' % ( containerToken.piFirst))
                if hasattr( containerToken, 'piLast'):
                    msg.append( 'piLast=%s' % ( containerToken.piLast))
            msg.append( ']')

        try: msg.append( 'prim=%s' % ( sorted( list( token.primitiveSet), key=primitiveOrder)))
        except AttributeError: pass
        try: msg.append( 'sym=%s' % ( token.symbol))
        except AttributeError: pass
        try: msg.append( 'symFirst=%s' % ( token.symFirst))
        except AttributeError: pass
        try: msg.append( 'symLast=%s' % ( token.symLast))
        except AttributeError: pass
        try: msg.append( 'L%d' % ( token.layerNumber))
        except AttributeError: pass
        try: msg.append( 'R%d' % ( token.roleNumber))
        except AttributeError: pass

        try: msg.append( 'op=%s' % ( token.operator))
        except AttributeError: pass
        try: msg.append( 'opFirst=%s' % ( token.opFirst))
        except AttributeError: pass
        try: msg.append( 'opLast=%s' % ( token.opLast))
        except AttributeError: pass
        if hasattr( token, 'operands'):
            msg.append( 'opnds=[')
            for opndToken in token.operands:
                msg.append( '%s-%d' % ( opndToken.tokenTypeName(), opndToken.id))
            msg.append( ']')

        if hasattr( token, 'semes'):
            msg.append( 'semes=[')
            for semeToken in token.semes:
                msg.append( '%s-%d' % ( semeToken.tokenTypeName(), semeToken.id))
            msg.append( ']')

        try: msg.append( 'lmark=%s' % ( hexBackslashify( token.layerMark)))
        except AttributeError: pass

        try: msg.append( 'txt=%s' % ( hexBackslashify( token.text)))
        except AttributeError: pass
        try: msg.append( 'txtwc=%s' % ( token.textWithDelimitedComments))
        except AttributeError: pass

        try: msg.append( 'pi=%s' % ( token.parameterIdentifier))
        except AttributeError: pass
        try: msg.append( 'piFirst=%s' % ( token.piFirst))
        except AttributeError: pass
        try: msg.append( 'piLast=%s' % ( token.piLast))
        except AttributeError: pass

        msg.append( 'id=%d' % ( token.id))
        if hasattr( token, 'implicit') and token.implicit == True:
            msg.append( 'implicit')
        return ' '.join( msg)


    global idsInRecursionAlready, iemlExpressionString

    try:
        iemlExpressionString
    except:
        rootToken = thisToken.findRoot()
        iemlExpressionString = rootToken.expression

    INDENT = 3
    
    recursive = False                        

    recurSelectively = False
    maxRecur = 0  ## by default
    indent = 0
    recurMsg = ''
    for kwarg in kwargs:
        if kwarg == 'recurSelectively':
            recurSelectively = kwargs[ 'recurSelectively']
        elif kwarg == 'maxRecur':
            maxRecur = kwargs[ 'maxRecur']
        elif kwarg == 'indent':
            indent = kwargs[ 'indent']
        elif kwarg == 'recurMsg':
            recurMsg = kwargs[ 'recurMsg']
        elif kwarg == 'recursive':
            recursive = kwargs[ 'recursive']
        else:
            errMsg( 'internal error: Unrecognized keyword arg "%s"' % ( kwarg))
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)

    if not recursive:
        idsInRecursionAlready = {}

    msg = '%s%s\n' % (
        ' ' * indent * INDENT,
        '-' * 60,
    )

    if len( recurMsg) > 0:
        msg = '%s%s%s\n' % (
            msg,
            ' ' * (( indent * INDENT) + 5),
            recurMsg,
        )
        msg = '%s%s%s\n' % (
            msg,
            ' ' * indent * INDENT,
            '-' * 60,
        )

        

    if hasattr( thisToken, 'first'):
        showPlaceString = showPlaceInString(
            iemlExpressionString,
            thisToken.first,
            thisToken.last,
            indent, INDENT,
            protectNewlines = False,
            xmlEscapeForAttributeValue = False,
        )
    else:
        showPlaceString = '---(no -first- attribute)---'
    msg = '%s%s\n' % (
        msg,
        showPlaceString,
    )

    ## tokenType
    try:
        if len( thisToken.tokenType) == 1:
            if ord( thisToken.tokenType[ 0]) > 0x7f:
                displayTokenType = '\\u%04x' % ( ord( thisToken.tokenType))
            else:
                displayTokenType = thisToken.tokenType
        else:
            displayTokenType = thisToken.tokenType

        msg = '%s%s%25.25s : %s (%s) id=%d\n' % (
            msg,
            ' '*indent*INDENT,
            'type',
            thisToken.tokenTypeName(),
            displayTokenType,
            thisToken.id,
        )
    except AttributeError:
        msg = '%s%s%25.25s : %s (%s) id=%s\n' % (
            msg,
            ' '*indent*INDENT,
            'type',
            '(none)',
            '(none)',
            '(none)',
        )

    ## expression
    if hasattr( thisToken, 'expression'):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'expression', thisToken.expression)

    ## parent
    if hasattr( thisToken, 'parent'):
        if thisToken.parent != None and hasattr( thisToken.parent, 'tokenType'):
            if ord( thisToken.parent.tokenType[ 0]) > 0x7f:
                displayParentTokenType = '\\u%04x' % ( ord( thisToken.parent.tokenType))
            else:
                displayParentTokenType = thisToken.parent.tokenType
            displayParentTokenTypeName = thisToken.parent.tokenTypeName()
        else:
            displayParentTokenType = None
            displayParentTokenTypeName = None
    else:
        displayParentTokenType = None
        displayParentTokenTypeName = None

    msg = '%s%s%25.25s : %s (%s)\n' % (
        msg,
        ' '*indent * INDENT,
        'parent type',
        displayParentTokenTypeName,
        displayParentTokenType,
    )

    ## "(remaining subtokens)"
    if hasattr( thisToken, 'zubTokenLists'):
        msg = showSubtokenSummary( msg, thisToken, indent, thisToken.zubTokenLists[ -1], '(remaining subtokens)')

    ## complexes (if tokenTypeName() == 'usl')
    msg = showSubtokenSummary( msg, thisToken, indent, 'complexAtL0')
    msg = showSubtokenSummary( msg, thisToken, indent, 'complexAtL1')
    msg = showSubtokenSummary( msg, thisToken, indent, 'complexAtL2')
    msg = showSubtokenSummary( msg, thisToken, indent, 'complexAtL3')
    msg = showSubtokenSummary( msg, thisToken, indent, 'complexAtL4')
    msg = showSubtokenSummary( msg, thisToken, indent, 'complexAtL5')
    msg = showSubtokenSummary( msg, thisToken, indent, 'complexAtL6')

    ## containedIn
    msg = showSubtokenSummary( msg, thisToken, indent, 'containedIn')

    ## primitiveSet
    if hasattr( thisToken, 'primitiveSet'):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'primitiveSet', sorted( list( thisToken.primitiveSet), key=primitiveOrder))

    ## symbol and symbolTokens
    if hasattr( thisToken, 'symbol'):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'symbol', thisToken.symbol)
    if hasattr( thisToken, 'symFirst'):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'symFirst', thisToken.symFirst)
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'symLast', thisToken.symLast)
    msg = showSubtokenSummary( msg, thisToken, indent, 'symbolTokens')

    ## layerNumber
    if hasattr( thisToken, 'layerNumber'):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'layerNumber', thisToken.layerNumber)

    ## roleNumber
    if hasattr( thisToken, 'roleNumber'):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'roleNumber', thisToken.roleNumber)

    ## leading CIWs, startDelimiters
    msg = showSubtokenSummary( msg, thisToken, indent, 'leadingCIWTokens')

    if hasattr( thisToken, 'startDelimiterText'):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'startDelimiterText', thisToken.startDelimiterText)
    msg = showSubtokenSummary( msg, thisToken, indent, 'startDelimiterTokens')

    msg = showSubtokenSummary( msg, thisToken, indent, 'preGenOpCIW')
    msg = showSubtokenSummary( msg, thisToken, indent, 'postSeme1CIW')
    msg = showSubtokenSummary( msg, thisToken, indent, 'postSeme2CIW')
    msg = showSubtokenSummary( msg, thisToken, indent, 'postSeme3CIW')
    msg = showSubtokenSummary( msg, thisToken, indent, 'postGenOpCIW')

    msg = showSubtokenSummary( msg, thisToken, indent, 'preNonGenOpCIW')
    msg = showSubtokenSummary( msg, thisToken, indent, 'preOperatorCIW')
    msg = showSubtokenSummary( msg, thisToken, indent, 'postOperatorCIW')
    msg = showSubtokenSummary( msg, thisToken, indent, 'postNonGenOpCIW')

    ## operator
    if ( hasattr( thisToken, 'operator')):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'operator', thisToken.operator)
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'opFirst', thisToken.opFirst)
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'opLast', thisToken.opLast)
    msg = showSubtokenSummary( msg, thisToken, indent, 'operatorTokens')

    ## operands
    msg = showSubtokenSummary( msg, thisToken, indent, 'operands')

    ## semes
    msg = showSubtokenSummary( msg, thisToken, indent, 'semes')

    ## layermark
    if ( hasattr( thisToken, 'layerMark')):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'layerMark', thisToken.layerMark)
    msg = showSubtokenSummary( msg, thisToken, indent, 'layerMarkTokens')

    ## text and textWithDelimitedComments
    if ( hasattr( thisToken, 'text')):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'text', thisToken.text)
    if ( hasattr( thisToken, 'textWithDelimitedComments')):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'textWithDelimitedComments', thisToken.textWithDelimitedComments)

    ## parameterIdentifier
    if ( hasattr( thisToken, 'parameterIdentifier')):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'parameterIdentifier', thisToken.parameterIdentifier)
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'piFirst', thisToken.piFirst)
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'piLast', thisToken.piLast)
    msg = showSubtokenSummary( msg, thisToken, indent, 'parameterIdentifierTokens')

    ## trailing cruft
    try: msg = showSubtokenSummary( msg, thisToken, indent, 'postParameterIdentifierCIW', thisToken.postParameterIdentifierCIW)
    except AttributeError: pass
    if ( hasattr( thisToken, 'endDelimiterText')):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'endDelimiterText', thisToken.endDelimiterText)
    msg = showSubtokenSummary( msg, thisToken, indent, 'endDelimiterTokens')
    msg = showSubtokenSummary( msg, thisToken, indent, 'trailingCIWTokens')
    
    ## prologue/epilogue
    if ( hasattr( thisToken, 'prologueText')):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'prologueText', thisToken.prologueText)
    msg = showSubtokenSummary( msg, thisToken, indent, 'prologueTokens')
    if ( hasattr( thisToken, 'epilogueText')):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'epilogueText', thisToken.epilogueText)
    msg = showSubtokenSummary( msg, thisToken, indent, 'epilogueTokens')

    ## leading/trailing categorySeparators
    if ( hasattr( thisToken, 'leadingCatSep')):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'leadingCatSepText', thisToken.leadingCatSepText)
    msg = showSubtokenSummary( msg, thisToken, indent, 'leadingCatSep')
    if ( hasattr( thisToken, 'trailingCatSep')):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'trailingCatSepText', thisToken.trailingCatSepText)
    msg = showSubtokenSummary( msg, thisToken, indent, 'trailingCatSep')

    ## binary
    if ( hasattr( thisToken, 'binary')):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'binary', showIemlBSet( thisToken.binary))

    ## etc: first, last, id, implicit, and originalTokens
    try:
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'first', thisToken.first)
    except AttributeError:
        pass
    try:
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'last', thisToken.last)
    except AttributeError:
        pass
    try:
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'id', thisToken.id)
    except AttributeError:
        pass
    if hasattr( thisToken, 'implicit'):
        msg = '%s%s%25.25s : %s\n' % ( msg, ' '*indent*INDENT, 'implicit', thisToken.implicit)

    msg = showSubtokenSummary( msg, thisToken, indent, 'originalTokens')
    
    if maxRecur < 1:
        return msg

    msg = callShowTokenRecursively( msg, thisToken, thisToken.zubTokenLists[ -1], 'zubTokenLists[ -1]', kwargs)

    if recurSelectively:
        recurOnList = [
            'complexAtL0',
            'complexAtL1',
            'complexAtL2',
            'complexAtL3',
            'complexAtL4',
            'complexAtL5',
            'complexAtL6',
            'operands',
            'semes',
        ]
    else:
        recurOnList = [
            'complexAtL0',
            'complexAtL1',
            'complexAtL2',
            'complexAtL3',
            'complexAtL4',
            'complexAtL5',
            'complexAtL6',
            'containedIn',
            'symbolTokens',
            'leadingCIWTokens',
            'startDelimiterTokens',
            'preGenOpCIW',
            'postSeme1CIW',
            'postSeme2CIW',
            'postSeme3CIW',
            'postGenOpCIW',
            'preNonGenOpCIW',
            'preOperatorCIW',
            'postOperatorCIW',
            'postNonGenOpCIW',
            'operatorTokens',
            'operands',
            'semes',
            'layerMarkTokens',
            'parameterIdentifierTokens',
            'postParameterIdentifierCIW',
            'endDelimiterTokens',
            'trailingCIWTokens',
            'prologueTokens',
            'epilogueTokens',
        ]

    for recurOnAttributeName in recurOnList:
        msg = callShowTokenRecursively(
            msg,
            thisToken,
            recurOnAttributeName,
            recurOnAttributeName,
            kwargs,
        )

    return msg

#######################################################
def makeListOfCharTokensFromString( exprString):

    tokenList = []
    charPosition = -1
    for c in exprString:
        charPosition += 1
        tokenList.append(
            Token(
                chr( ord( c) + numberOfNonChrTokenTypes),  ## Every char has its own tokenType, which is itself + numberOfNonChrTokenTypes
                [], ## no subtokens
                first = charPosition,
                last  = charPosition,
            )
        )
    return tokenList


#######################################################
def starparser_usage( errorMsg, exitStatus):

    global iemlExpressionString, iemlExpressionToken, inputFilePath, STARPARSER_VERSION, isUSL
    global inputEncoding, defaultInputEncoding
    global collapseContainers, defaultCollapseContainers
    global addImplicitSemes, defaultAddImplicitSemes
    global validateXml, defaultValidateXml
    global outputEncoding, defaultOutputEncoding
    global tokenDisp, defaultTokenDisp
    global firstLast, defaultFirstLast
    global includeDtd, defaultIncludeDtd
    global prettyXml, defaultPrettyXml
    global showHelperCommand, defaultShowHelperCommand
    global binary, defaultBinary
    global dtdWithPEsStream, defaultDtdWithPEsStream, dtdWithPEsStreamFO
    global dtdOnlyForHtmlStream, defaultDtdOnlyForHtmlStream, dtdOnlyForHtmlStreamFO
    global dtdStream, defaultDtdStream, dtdStreamFO
    global starErrorsStream, defaultStarErrorsStream, starErrorsStreamFO
    global xmlErrorsStream, defaultXmlErrorsStream, xmlErrorsStreamFO
    global internalErrorsStream, defaultInternalErrorsStream, internalErrorsStreamFO
    global jsonStream, defaultJsonStream, jsonStreamFO
    global pickleStream, defaultPickleStream, pickleStreamFO
    global xmlStream, defaultXmlStream, xmlStreamFO
    global xmlAsParsedStream, defaultXmlAsParsedStream, xmlAsParsedStreamFO
    global tokensStream, defaultTokensStream, tokensStreamFO
    global allTokensStream, defaultAllTokensStream, allTokensStreamFO

    if errorMsg:
        writeOutput( starErrorsStreamFO, '\nError: %s\n' % ( errorMsg), '-StarErrors')

    writeOutput( starErrorsStreamFO, """
Usage: %s [options] [<IEML expression>]

Input options:
--------------

          -i  <input file containing a Star expression>
                Default: If no -i option is used,
                the IEML expression to be parsed must be 
                provided as an invocation argument.
                Use '-' for stdin.

-inputEncoding  <encoding of input>
                Default: %s  (See '-encodings', below.)

Processing options:
-------------------

-collapseContainers  <'True' or 'False'>  Default: '%s'.  'True' to 
                       "collapse" each syntactically unnecessary container
                       construct, like a section of a telescope, into the
                       "containedIn" attribute of the construct that it
                       contains.  Four container constructs are affected:

                       group (...)
                       categoryExpression
                       undeterminedSubsetOf <...>
                       diagonal {...}

                       For example, if -collapseContainers is True and the Star
                       expression is "(A:)", the parser does not report the
                       containing group as a node in the reported hierarchy,
                       and instead it makes the same report that would have
                       been made if the Star expression had been "A:", except
                       that the reported genOpAtL0's "containedIn" attribute
                       lists the group that was "collapsed out of" (i.e.,
                       removed from) the hierarchy.
		       
 -addImplicitSemes   <'True' or 'False'>  Default: '%s'.  If 'True', report
                       implicit semes as part of the ordinary
                       hierarchy, as if they had been explicit.  For example,
                       "A:." is reported as if the Star expression had been
                       "A:E:E:.".  The added implicit semes are reported
                       with "implicit" attributes.  This option is forced to
                       be True if -binary is True, so if you really don't
                       want the implicit semes to be added, you must say
                       -binary False as well as -addImplicitSemes False.

      -validateXml   <'True' or 'False'>  Default: '%s'.  If 'True', check
                       to see whether the XML output conforms to the DTD,
                       and report errors on the -xmlErrors stream (see below).


Output options:
---------------

-outputEncoding  <encoding used for most outputs>
                Default: %s   (See '-encodings', below.)  The specified
                encoding will be used for all outputs except stdout
                and stderr, and except for JSON and pickle (Python) outputs.

 -tokenDisp  <'True' or 'False'>
                Default: '%s'  'True' if you want the XML output
                to include "tokenDisp" attributes, which are bulky
                but helpful in understanding IEML expressions.  The
                value of each "tokenDisp" attribute is the entire
                Star expression, plus carets under the characters
                in the Star expression that correspond to the
                XML element.

 -firstLast  <'True' or 'False'>  
               Default: '%s'  Use 'True' if you want the XML output to
               include "first" and "last" attributes.  Useful for IEML
               editing applications, these give the ordinal positions
               of the first and last characters of the STAR
               subexpression that corresponds to the XML element.
	       In addition to 'first' and 'last', this option also
               controls the output of 'piFirst', 'piLast', 'opFirst',
               'opLast', 'symFirst', and 'symLast' attributes.
                
-includeDTD  <'True' or 'False'>  
                Default: %s  Use 'True' to include the DTD in the XML
                output.

 -prettyXml  <'True' or 'False'>  
                Default: %s  'True' to format the XML output for 
                human inspection.

-showHelperCommand  <'True' or 'False'>  
                Default: %s  'True' to output the command used to
                invoke the helper program, starprettyvalid.py, to
                the -internalErrors stream (see below).

      -binary <'True' or 'False'>
                Default: %s  'True' if you want the outputs (XML, JSON)
                to include 'binary' attributes.  These provide the
                binary value of each IEML construct, or None if the
                binary value could not be calculated.  'True' forces
                -addImplicitSemes to be 'True', too.


Output streams:
---------------

Note: Special <filename>s are available for outputs:
     Use  '-' for stdout (standard output)
     Use '--' for stderr (standard error output)
     Use  '.' to suppress default output.

-dtdWithPEs  <filename> Default: '%s'  Output a human-friendly
               representation of the XML DTD, in which
               XML "parameter entities" are used instead of 
               explicitly repeating long strings in element 
               content models and attribute lists, and exit.

       -dtd  <filename> Default: '%s'  Output a machine-friendly
               representation of the XML DTD, in which no
               XML"parameter entities" are invoked, and everything
               is (repetitiously) explicit, and exit.

-dtdonlyforhtml <filename> Default: '%s'  Output the to the 
               specified stream the XML DTD in a form suitable 
               for inclusion in an HTML document.

-StarErrors  <filename>  Default: '%s'  Reports about syntax
               errors in Star expressions, and about other kinds
               of errors in the usage of Starparser.

-xmlErrors  <filename>  Default: '%s'  Reports about syntax 
               errors in the XML outputs of the parser.  These are
               only useful for parser maintenance.

-internalErrors  <filename>  Default: '%s'  Reports about 
               defects in the parser itself.  These are only useful
               for parser maintenance.

      -json  <filename>
               Default: '%s'  JSON output.  If you're working in
               JavaScript or Ajax, this is probably what you want.

    -pickle  <filename>
               Default: '%s'  Python "pickled" output of the final
               token tree.  If you're working in
               Python, this is probably what you want.  You will
               also need to copy the "Token" class definition from
               the source code of %s.

       -xml  <filename>
               Default: '%s'  XML output.  This is the normal
               output of Starparser.

-xmlAsParsed  <filename>
               Default: '%s'  The version of the XML that was used
               for validation by the XML parser against its DTD.
               If there are XML validation errors, the error reports
               contain line numbers that are accurate with respect
               to this file.  Useful for maintenance of this program.

    -tokens  <filename>
               Default: '%s'  Useful for learning about Star and
               Starparser, and for parser maintenance.  Shows all
               the "important" tokens, but does not show delimiter
               tokens, whitespace tokens, etc.  (A "token" is a
               unit of parsing/interpretation.)
    
 -allTokens  <filename>
               Default: '%s'  Useful for learning about Star and
               Starparser, and for parser maintenance.  Shows ALL
               the tokens that are still in scope at the end of the
               parsing process.
    
Other options:
--------------

           -h  Show this help information and exit.

       -about  Show the license notice, etc., and exit

-releaseNotes  Show the release notes and exit.

   -encodings  Show a list of available encodings and exit.

     -version  Show the version number of this program and exit.

       -debug  <'True' or 'False'>
               Default: %s.  'True' to operate the parser in 
               debug mode.  In this mode, the pdb Python debugger is
               invoked both before and after each token-tweaking
               operation, after displaying the parsing situation.

""" % (
            os.path.split( sys.argv[ 0])[ 1],
            defaultInputEncoding,
            defaultCollapseContainers,
            defaultAddImplicitSemes,
            defaultValidateXml,
            defaultOutputEncoding,
            defaultTokenDisp,
            defaultFirstLast,
            defaultIncludeDtd,
            defaultPrettyXml,
            defaultShowHelperCommand,
            defaultBinary,
            defaultDtdWithPEsStream,
            defaultDtdStream,
            defaultDtdOnlyForHtmlStream,
            defaultStarErrorsStream,
            defaultXmlErrorsStream,
            defaultInternalErrorsStream,
            defaultJsonStream,
            defaultPickleStream,
            os.path.split( sys.argv[ 0])[ 1],
            defaultXmlStream,
            defaultXmlAsParsedStream,
            defaultTokensStream,
            defaultAllTokensStream,
            defaultTokenTweakDebug,
      ),
      '-StarErrors',
    )

    if errorMsg:
        writeOutput( starErrorsStreamFO, '\nError: %s\n' % ( errorMsg), '-StarErrors')

    sys.exit( exitStatus)



#######################################################
def starparser():

    global tokenTweakDebug
    global iemlExpressionString, iemlExpressionToken, inputFilePath, STARPARSER_VERSION, isUSL
    global inputEncoding, defaultInputEncoding
    global collapseContainers, defaultCollapseContainers
    global addImplicitSemes, defaultAddImplicitSemes
    global validateXml, defaultValidateXml
    global outputEncoding, defaultOutputEncoding
    global tokenDisp, defaultTokenDisp
    global firstLast, defaultFirstLast
    global includeDtd, defaultIncludeDtd
    global prettyXml, defaultPrettyXml
    global showHelperCommand, defaultShowHelperCommand
    global binary, defaultBinary
    global dtdWithPEsStream, defaultDtdWithPEsStream, dtdWithPEsStreamFO
    global dtdOnlyForHtmlStream, defaultDtdOnlyForHtmlStream, dtdOnlyForHtmlStreamFO
    global dtdStream, defaultDtdStream, dtdStreamFO
    global starErrorsStream, defaultStarErrorsStream, starErrorsStreamFO
    global xmlErrorsStream, defaultXmlErrorsStream, xmlErrorsStreamFO
    global internalErrorsStream, defaultInternalErrorsStream, internalErrorsStreamFO
    global jsonStream, defaultJsonStream, jsonStreamFO
    global pickleStream, defaultPickleStream, pickleStreamFO
    global xmlStream, defaultXmlStream, xmlStreamFO
    global xmlAsParsedStream, defaultXmlAsParsedStream, xmlAsParsedStreamFO
    global tokensStream, defaultTokensStream, tokensStreamFO
    global allTokensStream, defaultAllTokensStream, allTokensStreamFO

    takeCensusOfEncodings()  ## creates encodingsLookupDict and encodingsAliasesDict

    isUSL = False
    iemlExpressionString = None

    ## Below are things we're about to set (or not set) from arguments passed to the parser on the command line.
    ## Here we set them to their default values.  THIS IS NOT THE PLACE TO CHANGE THE DEFAULTS.
    inputFilePath = None
    inputEncoding = defaultInputEncoding
    collapseContainers = defaultCollapseContainers
    addImplicitSemes = defaultAddImplicitSemes
    validateXml = defaultValidateXml
    outputEncoding = defaultOutputEncoding
    tokenDisp = defaultTokenDisp
    firstLast = defaultFirstLast
    includeDtd = defaultIncludeDtd
    prettyXml = defaultPrettyXml
    showHelperCommand = defaultShowHelperCommand
    binary = defaultBinary
    dtdWithPEsStream = defaultDtdWithPEsStream
    dtdOnlyForHtmlStream = defaultDtdOnlyForHtmlStream
    dtdStream = defaultDtdStream
    starErrorsStream = defaultStarErrorsStream
    xmlErrorsStream = defaultXmlErrorsStream
    internalErrorsStream = defaultInternalErrorsStream
    jsonStream = defaultJsonStream
    pickleStream = defaultPickleStream
    xmlStream = defaultXmlStream
    xmlAsParsedStream = defaultXmlAsParsedStream
    tokensStream = defaultTokensStream
    allTokensStream = defaultAllTokensStream

    showAbout = False
    showReleaseNotes = False
    showEncodings = False
    showVersion = False
    showUsage = False
    argErrorMsgList = []

    global argCounter
    #######################################################
    def getNextArg():
        global argCounter
        argCounter += 1
        if argCounter >= len( sys.argv):
            starparser_usage( 'Missing argument after "%s"' % (
                    sys.argv[ argCounter - 1],
                ),
                1,
            )
        return sys.argv[ argCounter]
    #######################################################
    
    argCounter = 1
    while argCounter < len( sys.argv):
        arg = sys.argv[ argCounter]
        if arg.lower() == '-i':
            inputFilePath = getNextArg()
        elif arg.lower() == '-inputencoding':
            inputEncoding = getNextArg()
        elif arg.lower() == '-collapsecontainers':
            collapseContainers = trueOrFalse( getNextArg())
        elif arg.lower() == '-addimplicitsemes':
            addImplicitSemes = trueOrFalse( getNextArg())
        elif arg.lower() == '-validatexml':
            validateXml = trueOrFalse( getNextArg())
        elif arg.lower() == '-outputencoding':
            outputEncoding = getNextArg()
        elif arg.lower() == '-tokendisp':
            tokenDisp = trueOrFalse( getNextArg())
        elif arg.lower() == '-firstlast':
            firstLast = trueOrFalse( getNextArg())
        elif arg.lower() == '-includedtd':
            includeDtd = trueOrFalse( getNextArg())
        elif arg.lower() == '-prettyxml':
            prettyXml = trueOrFalse( getNextArg())
        elif arg.lower() == '-showhelpercommand':
            showHelperCommand = trueOrFalse( getNextArg())
        elif arg.lower() == '-binary':
            binary = trueOrFalse( getNextArg())
        elif arg.lower() == '-dtdwithpes':
            dtdWithPEsStream = getNextArg()
        elif arg.lower() == '-dtd':
            dtdStream = getNextArg()
        elif arg.lower() == '-starerrors':
            starErrorsStream = getNextArg()
        elif arg.lower() == '-internalerrors':
            internalErrorsStream = getNextArg()
        elif arg.lower() == '-xmlerrors':
            xmlErrorsStream = getNextArg()
        elif arg.lower() == '-json':
            jsonStream = getNextArg()
        elif arg.lower() == '-pickle':
            pickleStream = getNextArg()
        elif arg.lower() == '-xml':
            xmlStream = getNextArg()
        elif arg.lower() == '-xmlasparsed':
            xmlAsParsedStream = getNextArg()
        elif arg.lower() == '-tokens':
            tokensStream = getNextArg()
        elif arg.lower() == '-alltokens':
            allTokensStream = getNextArg()
        elif arg.lower() == '-dtdonlyforhtml':
            dtdOnlyForHtmlStream = getNextArg()
        elif arg.lower() == '-debug':
            tokenTweakDebug = trueOrFalse( getNextArg())
        elif arg.lower() == '-about':
            showAbout = True
        elif arg.lower().startswith( '-rel'):
            showReleaseNotes = True
        elif arg.lower() == '-encodings':
            showEncodings = True
        elif arg.lower().startswith( '-v'):
            showVersion = True
        elif arg.lower().startswith( '-h'):
            showUsage = True
        else:
            if iemlExpressionString == None:
                iemlExpressionString = arg
            else:
                argErrorMsgList.append(
                    'Multiple IEML expression strings were specified:\n(1) "%s"\n(2) "%s"\n' % (
                        iemlExpressionString,
                        arg,
                    ),
                )
                break

        argCounter += 1    

    ## Now, before anything else, determine the encodings
    for encodingName in [
        ( inputEncoding, 'input', 'inputEncoding'),
        ( outputEncoding, 'output', 'outputEncoding'),
    ]:
        if not normalizeEncodingName( encodingName[ 0]) in encodingsLookupDict:
            starparser_usage(
                'Encoding "%s" is not supported.  Sorry!  Try the -encodings option to see the supported encodings.' % (
                    encodingName[ 0],
                ),
                1,
            )
        if encodingsLookupDict[ normalizeEncodingName( encodingName[ 0])] != encodingName[ 0]:
            writeOutput(
                sys.stdout,
                'Using "%s" as the %s encoding; "%s" is one of its aliases.\n' % (
                    encodingsLookupDict[ normalizeEncodingName( encodingName[ 0])],
                    encodingName[ 1],
                    encodingName[ 0],
                ),
                '-StarErrors',
            )
            if encodingName[ 2] == 'inputEncoding':
                inputEncoding = encodingsLookupDict[ normalizeEncodingName( encodingName[ 0])]
            else:
                outputEncoding = encodingsLookupDict[ normalizeEncodingName( encodingName[ 0])]

    starErrorsStreamFO = openStream( starErrorsStream, 'w', outputEncoding)  ## from now on, we're using the correct output stream and encoding for error reporting

    if showAbout:
        writeOutput( starErrorsStreamFO, STARPARSER_ABOUT, '-StarErrors')
        sys.exit( 0)
    if showReleaseNotes:
        writeOutput( starErrorsStreamFO, STARPARSER_RELEASENOTES, '-StarErrors')
        sys.exit( 0)
    if showVersion:
        writeOutput( starErrorsStreamFO, 'version %s\n' % ( STARPARSER_VERSION), '-StarErrors')
        sys.exit( 0)
    if showUsage:
        starparser_usage( '', 0)
    if showEncodings:
        doShowEncodings()
        sys.exit( 0)
    if len( argErrorMsgList) > 0:
        starparser_usage( '\n'.join( argErrorMsgList), 1)

    if binary:
        addImplicitSemes = True ## implicit semes must be present before binary values can be calculated.

    if binary and not collapseContainers:
        starparser_usage( '-binary cannot be True if -collapseContainers is False', 1)

    dtdWithPEsStreamFO = openStream( dtdWithPEsStream, 'w', outputEncoding)
    dtdStreamFO = openStream( dtdStream, 'w', outputEncoding)
    dtdOnlyForHtmlStreamFO = openStream( dtdOnlyForHtmlStream, 'w', outputEncoding)
    xmlErrorsStreamFO = openStream( xmlErrorsStream, 'w', outputEncoding)
    internalErrorsStreamFO = openStream( internalErrorsStream, 'w', outputEncoding)
    jsonStreamFO = openStream( jsonStream, 'w', None, json=True, )
    pickleStreamFO = openStream( pickleStream, 'w', None, pickle=True, )
    xmlStreamFO = openStream( xmlStream, 'w', outputEncoding)
    xmlAsParsedStreamFO = openStream( xmlAsParsedStream, 'w', outputEncoding)
    tokensStreamFO = openStream( tokensStream, 'w', outputEncoding)
    allTokensStreamFO = openStream( allTokensStream, 'w', outputEncoding)

    if dtdStreamFO != None:
        writeOutput( dtdStreamFO, dtdStringAfterParameterEntityExpansion, '-dtd')
        sys.exit( 0)

    if dtdWithPEsStreamFO != None:
        writeOutput( dtdWithPEsStreamFO, dtdStringAfterTemplateExpansion, '-dtdWithPEs')
        sys.exit( 0)

    if dtdOnlyForHtmlStreamFO != None:
        writeOutput(
            dtdOnlyForHtmlStreamFO,
            dtdStringAfterTemplateExpansion.replace( ' ', '&#160;').replace( '<', '&lt;').replace( '>', '&gt;').replace( '\n', '<br/>\n'),
            '-dtdOnlyForHtmlStream',
        )
        sys.exit( 0)

    if inputFilePath == None and iemlExpressionString == None:
        starparser_usage( 'Neither an input file nor an IEML expression were specified.', 1)
    elif inputFilePath != None and iemlExpressionString != None:
        starparser_usage( 'Both an input file and an IEML expression were specified.  Use one or the other, but not both.', 1)
    elif inputFilePath != None and iemlExpressionString == None:
        inputFO = openStream( inputFilePath, 'r', inputEncoding)
        iemlExpressionString = readInput( inputFO)

    iemlExpressionString = normalizeRecordEnds( iemlExpressionString)

    ## for a clear tokenDisp display, the string to be displayed must
    ## be forced to begin with a newline.  Might as well add it here.
    iemlExpressionString = '\n%s' % ( iemlExpressionString)

    iemlExpressionToken = Token(
        tokenTypeNameToRegExpableChar[ 'iemlExpression'],
        makeListOfCharTokensFromString( iemlExpressionString),
    )

    tokenTweak()  ## parse and interpret the Star expression

    if tokensStreamFO is not None:
        writeOutput(
            tokensStreamFO,
            showToken( iemlExpressionToken, maxRecur = 999, recurSelectively=True,),
            '-tokens',
        )

    if allTokensStreamFO is not None:
        writeOutput(
            allTokensStreamFO,
            showToken( iemlExpressionToken, maxRecur = 999, recurSelectively=False,),
            '-allTokens',
        )

    if jsonStream or xmlStream or xmlAsParsedStream:
        xmlString = '%s\n' % ( tokenTree2Xml())

    if jsonStreamFO is not None:
        json.dump( jsonDict, jsonStreamFO)

    if pickleStreamFO is not None:
       iemlExpressionToken.expression = iemlExpressionString[ 1:]  ## get rid of the leading extra \n
       pickle.dump( iemlExpressionToken, pickleStreamFO)
       del iemlExpressionToken.expression

    if xmlStream or xmlAsParsedStream:
        xmlStringList = []  ## final output will be assembled here
        if includeDtd:
            xmlStringList.append(
                xmlPrologueStr( includeDTD=True, encoding=outputEncoding,)
            )
        else:
            xmlStringList.append(
                xmlPrologueStr( includeDTD=False, encoding=outputEncoding,)
            )


        if validateXml or prettyXml:
            xmlAsParsedString = '%s%s' % (  ## parsable output
                xmlPrologueStr( includeDtd=True, encoding=None),
                xmlString,
            )
            writeOutput( xmlAsParsedStreamFO, xmlAsParsedString, '-xmlAsParsed')
            prettyPrintedXMLString = prettyPrintAndOrValidateXml( xmlAsParsedString)  ## parse it

            if prettyXml:
                xmlStringList.append( prettyPrintedXMLString)
            else:
                xmlStringList.append( xmlString)
            writeOutput( xmlStreamFO, ''.join( xmlStringList), '-xml')
        else:
            xmlStringList.append( xmlString)
            writeOutput( xmlStreamFO, ''.join( xmlStringList), '-xml')



#######################################################
def genOpError( errToken, recogMO, errorMessageList):
    
    MOTokenList = errToken.zubTokenLists[ -1][ recogMO.start() : recogMO.end()]

    MOGroupLengths = [ None, ]  ## here, None is a placeholder so the indexes into this list will be the same as the corresponding recogMO.group() arguments.
    ctr = 1
    while True:
        try:
            recogMO.group( ctr)
        except IndexError:
            break
        if recogMO.group( ctr) == None:
            MOGroupLengths.append( 0)  
        else:
            MOGroupLengths.append( len( recogMO.group( ctr)))
        ctr += 1



    semesListOfTokenLists = []
    semesListOfTokenLists.append(       ## semesListOfTokenLists[ 0] : R1 seme.
        MOTokenList[
            MOGroupLengths[ 1]
            :
            MOGroupLengths[ 1] + MOGroupLengths[ 2]
        ]
    )
    semesListOfTokenLists.append(       ## semesListOfTokenLists[ 1] : R2 seme.
        MOTokenList[
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3]
            :
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 5]
        ]
    )
    semesListOfTokenLists.append(       ## semesListOfTokenLists[ 2] : R3 seme.
        MOTokenList[
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 5] + MOGroupLengths[ 6]
            :
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 5] + MOGroupLengths[ 6] + MOGroupLengths[ 8]
        ]
    )
    layerMarkTokenList = (
        MOTokenList[
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 5] + MOGroupLengths[ 6] + MOGroupLengths[ 8] + MOGroupLengths[ 9]
            :
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 5] + MOGroupLengths[ 6] + MOGroupLengths[ 8] + MOGroupLengths[ 9] + MOGroupLengths[ 10]
        ]
    )

    semeLayers = [ None, None, None]
    reportString = ''
    for semeNum in range( 3):
        ordinalString = [ 'first', 'second', 'third'][ semeNum]

        try:
            semeLayers[ semeNum] = semesListOfTokenLists[ semeNum][ 0].layerNumber
        except IndexError:
            break
        reportString = '%s     The %s seme is at layer %d%s\n' % (
            reportString,
            ordinalString,
            semeLayers[ semeNum],
            showPlaceInString(
                iemlExpressionString,
                semesListOfTokenLists[ semeNum][ 0].first,
                semesListOfTokenLists[ semeNum][ 0].last,
                1, 5,
                protectNewlines = False,
                xmlEscapeForAttributeValue = False,
            ),
        )
    
    layerMark = charStringFromCharTokenList( layerMarkTokenList)
    layerMarkNumber = layerMarkToLayerNumber[ layerMark ]
    if layerMarkNumber == None:
        reportString = '%s     The terminating %s layer mark declares that 3 preceding (and possibly implicit) L(n) semes are a single category at L( n + 1).%s\n' % (
            reportString,
            layerMark, 
            showPlaceInString(
                iemlExpressionString,
                layerMarkTokenList[ 0].first,
                layerMarkTokenList[ -1].last,
                1, 5,
                protectNewlines = False,
                xmlEscapeForAttributeValue = False,
            ),
        )
    else:
        reportString = '%s     The terminating %s layer mark declares that 3 preceding (and possibly implicit) L%d semes are a single category at L%d.%s\n' % (
            reportString,
            layerMark, 
            layerMarkNumber - 1, 
            layerMarkNumber,
            showPlaceInString(
                iemlExpressionString,
                layerMarkTokenList[ 0].first,
                layerMarkTokenList[ -1].last,
                1, 5,
                protectNewlines = False,
                xmlEscapeForAttributeValue = False,
            ),
        )

    errorMessageList.append( 'Layer error.  Either these semes are not all\n     at the same layer, or they are not immediately\n     followed by a layer mark representing\n     the next higher layer, or both:%s\n%s\n' % (
        showPlaceInString(
            iemlExpressionString,
            MOTokenList[ 0].first,
            MOTokenList[ -1].last,
            1, 5,
            protectNewlines = False,
            xmlEscapeForAttributeValue = False,
        ),
        reportString,
    ))



#######################################################
def anomalousError( errToken, recogMO, errorMessageList):
    
    MOTokenList = errToken.zubTokenLists[ -1][ recogMO.start() : recogMO.end()]

    charCtr = 0
    foundHowMany = 0
    operandList = []
    while True:
        if charCtr >= len( errToken.zubTokenLists[ -1]): break
        if errToken.zubTokenLists[ -1][ charCtr].tokenType not in [
            tokenTypeNameToRegExpableChar[ 'comment'],
            tokenTypeNameToRegExpableChar[ 'instantiator'],
            tokenTypeNameToRegExpableChar[ 'whitespace'],
        ]:
            operandList.append( errToken.zubTokenLists[ -1][ charCtr])
            foundHowMany += 1
        charCtr += 1

    if foundHowMany == 1:
        thisOperandOrTheseOperands = 'No contextually-appropriate layermark appears after this operand'
    elif foundHowMany == 2:
        thisOperandOrTheseOperands = 'No contextually-appropriate layermark appears after these 2 operands,\n     nor is there a contextually-appropriate operator between them'
    elif foundHowMany == 3:
        thisOperandOrTheseOperands = 'No contextually-appropriate layermark appears after these 3 operands'
    else:
        thisOperandOrTheseOperands = 'There are %d operands, which is more than the maximum of 3, and no\n     contextually-appropriate layermark appears after them' % ( foundHowMany)
        
    errorMessageList.append( 'Error: %s:%s\n' % (
        thisOperandOrTheseOperands,
        showPlaceInString(
            iemlExpressionString,
            operandList[ 0].first,
            operandList[ -1].last,
            1, 5,
            protectNewlines = False,
            xmlEscapeForAttributeValue = False,
        ),
    ))



#######################################################
def nonGenOpError( errToken, recogMO, errorMessageList):
    
    MOTokenList = errToken.zubTokenLists[ -1][ recogMO.start() : recogMO.end()]

    listOfOperandTokenLists = []
    listOfOperandTokenLists.append( MOTokenList[ : len( recogMO.group( 1))])
    listOfOperandTokenLists.append(
        MOTokenList[
            len( recogMO.group( 1)) + len( recogMO.group( 2)) + len( recogMO.group( 3)) + len( recogMO.group( 4))
            :
            len( recogMO.group( 1)) + len( recogMO.group( 2)) + len( recogMO.group( 3)) + len( recogMO.group( 4)) + len( recogMO.group( 5))
        ]
    )

    operatorTokenList = MOTokenList[
        len( recogMO.group( 1)) + len( recogMO.group( 2))
        :
        len( recogMO.group( 1)) + len( recogMO.group( 2)) + len( recogMO.group( 3))
    ]


    opLayers = [ None, None]
    reportString = ''
    for opNum in range( 2):
        opLayers[ opNum] = listOfOperandTokenLists[ opNum][ 0].layerNumber
        if opNum == 0:
            ordinalString = 'first'
            firstCharNumber = listOfOperandTokenLists[ opNum][ 0].first
        else:
            ordinalString = 'second'
            lastCharNumber = listOfOperandTokenLists[ opNum][ 0].last
        reportString = '%s     The %s operand is at layer %d\n' % (
            reportString,
            ordinalString,
            opLayers[ opNum],
        )
    
    errorMessageList.append( 'Layer error.  These operands are not both at the same layer: %s\n%s\n' % (
        showPlaceInString(
            iemlExpressionString,
            firstCharNumber,
            lastCharNumber,
            1, 5,
            protectNewlines = False,
            xmlEscapeForAttributeValue = False,
        ),
        reportString,
    ))



#######################################################
def tokenTweak():
    global somethingChanged, iemlExpressionToken

    #######################################################
    def __tokenTweak( token, recog):
        global somethingChanged, workingToken
        
        somethingChanged = False

###  BEGIN DEBUG CODE
        if tokenTweakDebug:
##             foundz = False
##             for zubtoken in token.zubTokenLists[ -1]:
##                 if zubtoken.id == 517:
##                     foundz = True
##                     break
##             if foundz:
            if True:
                global zCtr
                try:
                    zCtr += 1
                except NameError:
                    zCtr = 0
                print( '\nBEFORE: (%d)' % ( zCtr))
                printToken( token, maxRecur = 0)
###                print( 'recog[ 1].pattern = \n%s' % ( groupifyREPatternDisplay( tokenTypeStringToTokenTypeNameList( recog[ 1].pattern))))
                xMO = recog[ 1].search( token.subTRxStr())
                print( 'tokenTypeStringToTokenTypeNameList( token.subTRxStr()): %s' % ( show( tokenTypeStringToTokenTypeNameList( token.subTRxStr()))))
#                sys.stdout.write( ( 'xMO: %s\n' % ( str( show( xMO, '').encode( 'us-ascii', 'replace')))).replace( '\\n', '\n').replace( '\n\n', '\n'))
                print( 'xMO: %s' % ( showMO( xMO, recog[ 1].pattern)))
                print( 'recog[ 2] = %s' % ( recog[ 2]))
                x = 1
                while True:
                    try:
                        print( 'group( %d) == %s' % ( x, tokenTypeStringToTokenTypeNameList( xMO.group( x))))
                    except:
                        break
                    x += 1

                try:
                    if recog[ 2] == nonGenOpProc and token.zubTokenLists[ -1][ 1].tokenTypeName() == 'char_|':
                        import pdb
                        pdb.set_trace()
                        print()
                except IndexError:
                    pass
            import pdb
            pdb.set_trace()
            pass
###  END DEBUG CODE
                
        recogMO = recog[ 1].search( token.subTRxStr())
        if not recogMO:
            return
        workingToken = token
        retval = recog[ 2]( recogMO)  ## call the function appropriate for the matched pattern

###  BEGIN DEBUG CODE
        if tokenTweakDebug:
##             foundz = False
##             for zubtoken in token.zubTokenLists[ -1]:
##                 if zubtoken.id == 517:
##                     foundz = True
##                     break
##             if foundz:
            if True:
                print( '\nAFTER:  (retval == %s)' % ( retval))
                printToken( workingToken, maxRecur = 0)
                try:
                    if recog[ 2] == nonGenOpProc:
                        import pdb
                        pdb.set_trace()
                        print()
                except IndexError:
                    pass

            import pdb
            pdb.set_trace()
            pass
###  END DEBUG CODE

        if retval == True:
            somethingChanged = True


    #######################################################
    def _tokenTweak( listOfDispatchLists):
        global somethingChanged, iemlExpressionToken

        thisDispatchList = []
        for dispatchList in listOfDispatchLists:
            thisDispatchList.extend( dispatchList)

        while True:
            somethingChanged2 = False
            for recog in thisDispatchList:
                procTokenTypeCtr = -1
                while procTokenTypeCtr < ( len( recog[ 0]) - 1):
                    procTokenTypeCtr += 1
                    somethingChanged = False
                    procTokenType = sorted( list( recog[ 0].keys()))[ procTokenTypeCtr]
                    if procTokenType in tokensDict[ 'tokenType']:
                        try:
                            procTokenIds = sorted( tokensDict[ 'tokenType'][ procTokenType].keys())
                            for procTokenId in procTokenIds:
                                while True:
                                    if procTokenId not in tokensDict[ 'tokenType'][ procTokenType]:
                                        errMsg( 'internal error; token %s not in tokensDict[ \'tokenType\'][ %s]' % (
                                            procTokenId,
                                            procTokenType,
                                        ))
                                        if tokenTweakDebug:
                                            import pdb
                                            pdb.set_trace()
                                        sys.exit( 1)
                                    else:
                                        __tokenTweak( tokensDict[ 'tokenType'][ procTokenType][ procTokenId], recog)
                                    if somethingChanged:
                                        somethingChanged3 = somethingChanged2 = True
                                        raise TokenTreeChangedException
                                    else:
                                        break
                        except TokenTreeChangedException:
                            procTokenTypeCtr -= 1
            if not somethingChanged2: break


    #######################################################
    
    __tokenTweak( iemlExpressionToken, ttDispatchList[ 0][ 0])  ## handle outer delimiters, if any

    _tokenTweak( [ ttDispatchList[ 1]])                       ## comments, instantiators, whitespace, category separators
    _tokenTweak( [
        ttDispatchList[ 2], ## containers: categoryExpression, group, undeterminedSubsetOf, diagonal
        ttDispatchList[ 3], ## L0 and L1 symbols
        ttDispatchList[ 4], ## genOps and nonGenOps
    ])

    ## special check to inspect parent/child relations, to detect internal errors, if any
    tokensByIds = {}
    tokensByIds.update( tokensDict[ 'id'])
    tokenIds = sorted( list( tokensByIds.keys()))
    for tokenId in tokenIds:
        thisToken = tokensByIds[ tokenId]
        for attributeName in [
#            'zubTokenLists',
            'complexAtL0',
            'complexAtL1',
            'complexAtL2',
            'complexAtL3',
            'complexAtL4',
            'complexAtL5',
            'complexAtL6',
            'semes',
            'operands',
        ]:
            if attributeName == 'zubTokenLists':
                subtokenList = thisToken.zubTokenLists[ -1]
            else:
                if not hasattr( thisToken, attributeName): continue
                subtokenList = getattr( thisToken, attributeName)
            for subtoken in subtokenList:
                if subtoken.parent != thisToken:
                    errMsg( 'internal error: parent/child inconsistency')
                    if tokenTweakDebug:
                        printToken( subtoken)
                        import pdb
                        pdb.set_trace()
                    sys.exit( 1)

##################################################################
## checking happens here, before telescoping (and before       ###
## sorting the categoryExpressions in the usl, if appropriate) ###
##################################################################

    errorMessageList = []

    for recog in ttDispatchList[ 7]:
        for tokenTypeChar in recog[ 0]:
            if tokenTypeChar in tokensDict[ 'tokenType']:
                for tokenId in tokensDict[ 'tokenType'][ tokenTypeChar]:
                    errToken = tokensDict[ 'tokenType'][ tokenTypeChar][ tokenId]
                    recogMO = recog[ 1].search( errToken.subTRxStr())
                    if recogMO:
                        recog[ 2]( errToken, recogMO, errorMessageList)

    #######################################################
    def detectTopLayerDiagonals():
        if tokenTypeNameToRegExpableChar[ 'diagonalAtL%d' % ( layerCount - 1)] in tokensDict[ 'tokenType']:
            for diagonalAtTopLayerTokenId in tokensDict[ 'tokenType'][ tokenTypeNameToRegExpableChar[ 'diagonalAtL%d' % ( layerCount - 1)]]:
                errToken = tokensDict[ 'tokenType'][ tokenTypeNameToRegExpableChar[ 'diagonalAtL%d' % ( layerCount - 1)]][ diagonalAtTopLayerTokenId]
                errorMessageList.append( 'diagonal operations cannot occur at layer %d.%s\n' % (
                    layerCount - 1,
                    showPlaceInString(
                        iemlExpressionString,
                        errToken.first,
                        errToken.last,
                        1, 5,
                        protectNewlines = False,
                        xmlEscapeForAttributeValue = False,
                    ),
                ))
    #######################################################
    detectTopLayerDiagonals()

    #######################################################
    def detectUnrecognizedChars():

        badCharMap = []
        foundBad = 0
        for j in range( len( iemlExpressionString)):
            badCharMap.append( False)
        for tokenTypeKey in tokensDict[ 'tokenType']:
            if ( ord( tokenTypeKey) >= ordinalOfLeastChar) and ( ord( tokenTypeKey) <= MAXUNICODEVALUE):
                for tokenId in tokensDict[ 'tokenType'][ tokenTypeKey]:
                    badCharMap[ tokensDict[ 'tokenType'][ tokenTypeKey][ tokenId].first] = True
                    foundBad += 1
        if foundBad > 0:
            if foundBad == 1:
                errorMessageList.append( 'This character is not understood:%s\n' % (
                    showPlaceInString(
                        iemlExpressionString,
                        0,
                        0,
                        1, 5,
                        badCharMap = badCharMap,
                        protectNewlines = False,
                        xmlEscapeForAttributeValue = False,
                    ),
                ))
            else:
                errorMessageList.append( 'These characters are not understood:%s\n' % (
                    showPlaceInString(
                        iemlExpressionString,
                        0,
                        0,
                        1, 5,
                        badCharMap = badCharMap,
                        protectNewlines = False,
                        xmlEscapeForAttributeValue = False,
                    ),
                ))
    #######################################################
    detectUnrecognizedChars()

    ## report errors here
    if len( errorMessageList) > 0:
        writeOutput( starErrorsStreamFO, '\nERRORS:\n\n', '-StarErrors')
        errMsgNumber = 0
        for errorMessage in errorMessageList:
            errMsgNumber += 1
            writeOutput( starErrorsStreamFO, '%3d  %s\n' % (
                errMsgNumber,
                errorMessage,
                ),
                '-StarErrors',
            )
        sys.exit( 1)

######################
## end of checking ###
######################

    _tokenTweak( [ ttDispatchList[ 5],])   ## usl processing: sort the category expressions into their layers.

    if collapseContainers:
        _tokenTweak( [ ttDispatchList[ 6],])   ## collapse ("telescope") the containers

    if addImplicitSemes:
        addImplicitSemesToEverything()  ## add implicit semes

    if binary:
        addBinaryAttributes()     ## This is the last thing we do.


####################################################################################################
outerMarkupRE = re.compile( '(^[%s]*%s[%s]*)(.*?)([%s]*%s[%s]*$)' % (
        charRecogStr( star_whitespace),
        charRecogStr( star_expression[ 0]),
        charRecogStr( star_whitespace),

        charRecogStr( star_whitespace),
        charRecogStr( star_expression[ 1]),
        charRecogStr( star_whitespace),
    ),
    re.DOTALL,
)
outerWhitespaceRE = re.compile( '(^[%s]*)(.*?)([%s]*$)' % (
        charRecogStr( star_whitespace),
        charRecogStr( star_whitespace),
    ),
    re.DOTALL,
)
outerMarkupStartRE = re.compile( '(^[%s]*)(%s)([%s]*)' % (
        charRecogStr( star_whitespace),
        charRecogStr( star_expression[ 0]),
        charRecogStr( star_whitespace),
    ),
    re.DOTALL,
)
outerMarkupEndRE = re.compile( '([%s]*%s[%s]*$)' % (
        charRecogStr( star_whitespace),
        charRecogStr( star_expression[ 1]),
        charRecogStr( star_whitespace),
    ),
    re.DOTALL,
)

#######################################################
def binsc_primitive_operation( token):
    global iemlExpressionString

    ## first, check to be sure that each operand is exactly one binsc
    operandCounter = 0
    operandList = token.operands
    for operand in operandList:
        operandCounter += 1
        if operand.binary is None:
            return None
        if len( operand.binary) != 1:
            errorMessageList.append( 'operand %d has %d members.  In %s operations, each operand must be a set containing exactly one member.%s\n' % (
                operandCounter,
                operatorSymbolToOperationName[ operator],
                showPlaceInString(
                    iemlExpressionString,
                    token.first,
                    token.last,
                ),
            ))
            return None  ## this is an error

    binscsList = [
        bytearray(
            list( operandList[ 0].binary)[ 0]
        ),
        bytearray(
            list( operandList[ 1].binary)[ 0]
        ),
    ]

    ctr = 0
    newBytearray = bytearray()

    if token.operator == star_primitive_union:
        while ctr < len( binscsList[ 0]):
            newBytearray.append(
                binscsList[ 0][ ctr] | binscsList[ 1][ ctr]
            )
            ctr += 1
    elif token.operator == star_primitive_difference:
        while ctr < len( binscsList[ 0]):
            newBytearray.append(
                binscsList[ 0][ ctr] ^ binscsList[ 1][ ctr]
            )
            ctr += 1
    elif token.operator == star_primitive_intersection:
        while ctr < len( binscsList[ 0]):
            newBytearray.append(
                binscsList[ 0][ ctr] & binscsList[ 1][ ctr]
            )
            ctr += 1
    else:
        errMsg( 'internal error')
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)

    return frozenset( 
        [
            bytes( newBytearray),
        ],
    )



#######################################################
def binsc_sequence_operation( token):
    global sequenceOperationMessages, iemlExpressionString

    sequenceOperationMessages.append(
        'Calculation of binary values of sequence operations is not yet supported.%s' % (
            showPlaceInString(
                iemlExpressionString,
                token.first,
                token.last,
            ),
        )
    )
    return None

#######################################################
def addBinaryAttributes( token = None):
    global iemlExpressionToken

    if token is None:
        token = iemlExpressionToken

    tokenTypeName = token.tokenTypeName()
    if tokenTypeName.startswith( 'genOp'):    ## all genOps here

        if hasattr( token, 'semes'):
            if len( token.semes) != 3:
                errMsg( 'genOp has %d semes.  It should have 3.  Did the parser fail to add the implicit semes?  Check the -addImplicitSemes option on the parser.')
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)
            iemlBList = []
            binaryShouldBeNone = False
            for seme in token.semes:
                addBinaryAttributes( seme)

                if hasattr( seme, 'containedIn'):
                    for container in seme.containedIn:
                        if container.tokenTypeName().startswith( 'diagonal') or container.tokenTypeName().startswith( 'undeterminedSubsetOf'):
                            binaryShouldBeNone = True
                            break
                if seme.binary is None or binaryShouldBeNone:
                    binaryShouldBeNone = True
                else:
                    iemlBList.append( seme.binary)
            if binaryShouldBeNone:
                token.binary = None
            else:
                token.binary = IemlSolo( IemlTrio( iemlBList)).binscs
            return token.binary

        elif hasattr( token, 'primitiveSet'):
            primVal = 0
            for primChar in list( token.primitiveSet):
                primVal = primVal | primCharToBinValue[ primChar]
            token.binary = IemlSolo( primVal).binscs
            return token.binary

        else:
            errMsg( '%s has neither semes nor primitiveSet' % ( tokenTypeName))
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)

    elif tokenTypeName.startswith( 'primitive') or tokenTypeName.startswith( 'sequence'):  ## all nonGenOps here
        binaryShouldBeNone = False
        for operand in token.operands:
            addBinaryAttributes( operand)
            if operand.binary is None:
                binaryShouldBeNone = True
                token.binary = None
        if binaryShouldBeNone:
            token.binary = None

        if token.operator in [
            star_primitive_union,
            star_primitive_intersection,
            star_primitive_difference,
        ]:
            token.binary = binsc_primitive_operation( token)

        elif token.operator in [
            star_sequence_intersection,
            star_sequence_difference,
            star_sequence_union,
#            star_sequence_subtraction,
        ]:            
            token.binary = binsc_sequence_operation( token)

        else:
            errMsg( 'unrecognized operator: %s' % ( operator))
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)

        return token.binary

    elif (              ## all containers here
        tokenTypeName.startswith( 'categoryExpressionAtL') or
        tokenTypeName.startswith( 'groupAtL') or
        tokenTypeName.startswith( 'undeterminedSubsetOfAtL') or
        tokenTypeName.startswith( 'diagonalAtL')
    ):
        binaryShouldBeNone = False
        for content in token.zubTokenLists[ -1]:
            addBinaryAttributes( content)  ## even if len( token.zubTokenLists[ -1]) > 1, we still need to calculate the binary values of the content tokens
            if content.binary is None:
                binaryShouldBeNone = True
        if len( token.zubTokenLists[ -1]) != 1:
            binaryShouldBeNone = True  ## because calculation in this case is undefined.  Can't have more than one thing in a group.
        elif tokenTypeName.startswith( 'undeterminedSubsetOfAtL') or tokenTypeName.startswith( 'diagonalAtL'):
            binaryShouldBeNone = True  ## because:
                                       ## in the case of undetermined subsets, we haven't determined the subset, so we can't calculate.
                                       ## in the case of diagonals, we can't yet support calculations involving them.
        if binaryShouldBeNone:
            token.binary = None
        else:
            token.binary = token.zubTokenLists[ -1][ 0].binary
        return token.binary

    elif tokenTypeName is 'usl':
        token.uslBinary = []
        for j in range( layerCount):
            token.uslBinary.append( set()) ## one for each layer
        uslBinaryShouldBeNone = False
        for j in range( layerCount):
            complexAttName = 'complexAtL%d' % ( j)
            complexBinAttName = 'complexBinAtL%d' % ( j)
            complexBinaryShouldBeNone = False
            if hasattr( token, complexAttName):
                complexBinary = token.uslBinary[ j]
                setattr( token, complexBinAttName, complexBinary)
                for category in getattr( token, complexAttName):
                    addBinaryAttributes( category)
                    if category.binary is None:
                        complexBinaryShouldBeNone = True
                        token.uslBinary[ j] = None
                        setattr( token, complexBinAttName, None)
                    if token.uslBinary[ j] is not None:
                        complexBinary.update( category.binary)  ## sequence union ( + ) is implied
        if uslBinaryShouldBeNone:
            token.uslBinary = None
        return token.uslBinary
                    
    else:
        errMsg( 'unexpected token type: "%s"' % ( tokenTypeName))
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)

#######################################################
def charStringFromCharTokenList( charTokenList, **kwargs):
    if len( charTokenList) == 0:
        return None   ## thus, for example, token.parameterIdentifier will be set to None if there is no parameter identifier.

    chrList = []
    for charToken in charTokenList:
        if ord( charToken.tokenType) < ordinalOfLeastChar:
            if charToken.tokenType == tokenTypeNameToRegExpableChar[ 'comment']:
                if 'instantiatorWithComments' in kwargs and kwargs[ 'instantiatorWithComments']:
                    chrList.append( star_comment[ 0])
                    chrList.append( charToken.text)
                    chrList.append( star_comment[ 1])
                    continue
                elif 'instantiatorWithComments' in kwargs and not kwargs[ 'instantiatorWithComments']:
                    continue
                else:
                    errMsg( 'internal error')
                    if tokenTweakDebug:
                        import pdb
                        pdb.set_trace()
                    sys.exit( 1)

            elif charToken.tokenType == tokenTypeNameToRegExpableChar[ 'whitespace']:
                if 'showWhitespace' in kwargs and kwargs[ 'showWhitespace']:
                    for wsChar in charToken.zubTokenLists[ -1]:
                        chrList.append( wsChar.text)
                    continue
                else:
                    continue
            elif charToken.tokenType == tokenTypeNameToRegExpableChar[ 'categorySeparator']:
                    chrList.append( star_categorysep)
                    continue
            else:
                errMsg( 'internal error')
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)
        chrList.append( chr( ord( charToken.tokenType) - numberOfNonChrTokenTypes))
    return ''.join( chrList)

#######################################################
def outerDelimsProc( MO):

    global workingToken, iemlExpression
    token = workingToken

    if hasattr( token, 'prologueTokens'): return False

    rxstr = token.subTRxStr()

    outerMarkupMO =  outerMarkupRE.search( rxstr)
    outerMarkupStartMO = outerMarkupStartRE.search( rxstr)
    outerMarkupEndMO = outerMarkupEndRE.search( rxstr)

    if not outerMarkupMO:  ## it doesn't match the *...** model
        if outerMarkupStartMO:
            writeOutput( starErrorsStreamFO, 'Expression begins with a \'*\' but does not end with \'**\'.%s' % (
                showPlaceInString(
                    iemlExpressionString,
                    outerMarkupStartMO.start(),
                    outerMarkupStartMO.end(),
                    protectNewlines = False,
                    xmlEscapeForAttributeValue = False,
                ),
                '-StarErrors',
            ))
            sys.exit( 1)
        elif outerMarkupEndMO:
            writeOutput( starErrorsStreamFO, 'Expression does not begin with a \'*\' but ends with \'**\'.%s' % (
                showPlaceInString(
                    iemlExpressionString,
                    outerMarkupEndMO.start(),
                    outerMarkupEndMO.end(),
                    protectNewlines = False,
                    xmlEscapeForAttributeValue = False,
                ),
               '-StarErrors',
           ))
            sys.exit( 1)
        else:  ## it's consistent.  Neither prologue or epilogue markup exists.
            outerWhitespaceMO = outerWhitespaceRE.match( rxstr)
            prologueTokens = token.zubTokenLists[ -1][ 1 : len( outerWhitespaceMO.group( 1))]  ## start at 1 rather than 0 in order to avoid storing the artificial leading newline.
            token.zubTokenLists[ -1][ 0].removeSelfFromTokensDict()  ## get rid of the artificial leading newline
            newSubTokens = token.zubTokenLists[ -1][ len( outerWhitespaceMO.group( 1)) : len( outerWhitespaceMO.group( 1)) + len( outerWhitespaceMO.group( 2))]
            epilogueTokens = token.zubTokenLists[ -1][ len( outerWhitespaceMO.group( 1)) + len( outerWhitespaceMO.group( 2)) : ]
    else:
        prologueTokens = token.zubTokenLists[ -1][ 1 : len( outerMarkupMO.group( 1))]  ## start at 1 rather than 0 in order to avoid storing the artificial leading newline.
        token.zubTokenLists[ -1][ 0].removeSelfFromTokensDict()  ## get rid of the artificial leading newline
        newSubTokens = token.zubTokenLists[ -1][ len( outerMarkupMO.group( 1)) : len( outerMarkupMO.group( 1)) + len( outerMarkupMO.group( 2))]
        epilogueTokens = token.zubTokenLists[ -1][ len( outerMarkupMO.group( 1)) + len( outerMarkupMO.group( 2)) : ]

    if len( prologueTokens) > 0:
        token.prologueTokens = setAttributeOfTokensInList( prologueTokens, 'parent', token, removeSelfFromTokensDict = True)  ## actually, they already have token as their parent; it's an edge case.
        token.prologueText = charStringFromCharTokenList( prologueTokens)
    if len( epilogueTokens) > 0:
        token.epilogueTokens = setAttributeOfTokensInList( epilogueTokens, 'parent', token, removeSelfFromTokensDict = True)  ## actually, they already have token as their parent; it's an edge case.
        token.epilogueText = charStringFromCharTokenList( epilogueTokens)

    token.zubTokenLists.append( newSubTokens)  ## no need to reset first and last; they are still correct for their purpose

    return False  ## do not bother to repeat this function

#######################################################
def commentProc( MO):

    global workingToken
    token = workingToken

    preCommentTokenList = token.zubTokenLists[ -1][ : MO.start()]
    commentTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    postCommentTokenList = token.zubTokenLists[ -1][ MO.end() : ]

    commentStartDelimiterTokens = commentTokenList[ : len( MO.group( 1))]
    commentText = commentTokenList[ len( MO.group( 1)) : len( MO.group( 1)) + len( MO.group( 2))]
    commentEndDelimiterTokens = commentTokenList[ len( MO.group( 1)) + len( MO.group( 2)) : ]

    first = commentTokenList[ 0].first
    last = commentTokenList[ -1].last
    commentToken = Token(
        'comment',
        [],  ## nothing left to process here; saved as originalTokens (see below)
        parent = token,
        text = charStringFromCharTokenList( commentText),
        first = first,
        last = last,
    )
    commentToken.startDelimiterTokens = setAttributeOfTokensInList(
        commentStartDelimiterTokens,
        'parent',
        commentToken,
        removeSelfFromTokensDict = False,  ## because they will all be removed below
    )
    commentToken.startDelimiterText = charStringFromCharTokenList( commentStartDelimiterTokens)

    commentToken.endDelimiterTokens = setAttributeOfTokensInList(
        commentEndDelimiterTokens,
        'parent',
        commentToken,
        removeSelfFromTokensDict = False,  ## because they will all be removed below
    )
    commentToken.endDelimiterText = charStringFromCharTokenList( commentEndDelimiterTokens)

    commentToken.originalTokens = setAttributeOfTokensInList(
        commentTokenList,
        'parent',
        commentToken,
        removeSelfFromTokensDict = True,   ## because this is where they all get removed.
    )

    stList = []
    stList.extend( preCommentTokenList)
    stList.append( commentToken)
    stList.extend( postCommentTokenList)

    token.zubTokenLists.append( stList)

    return True

#######################################################
def instantiatorProc( MO):

    global workingToken
    token = workingToken

    preInstantiatorTokenList = token.zubTokenLists[ -1][ : MO.start()]
    instantiatorTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    postInstantiatorTokenList = token.zubTokenLists[ -1][ MO.end() : ]

    instantiatorStartDelimiterTokens = instantiatorTokenList[ : len( MO.group( 1))]
    instantiatorTokens = instantiatorTokenList[ len( MO.group( 1)) : len( MO.group( 1)) + len( MO.group( 2))]
    instantiatorEndDelimiterTokens = instantiatorTokenList[ len( MO.group( 1)) + len( MO.group( 2)) : ]

    first = instantiatorTokenList[ 0].first
    last = instantiatorTokenList[ -1].last
    instantiatorToken = Token(
        'instantiator',
        [],
        textWithDelimitedComments = charStringFromCharTokenList( instantiatorTokens, instantiatorWithComments = True),
        text = charStringFromCharTokenList( instantiatorTokens, instantiatorWithComments = False),
        first = first,
        last = last,
    )

    instantiatorToken.startDelimiterTokens = setAttributeOfTokensInList(
        instantiatorStartDelimiterTokens,
        'parent',
        instantiatorToken,
        removeSelfFromTokensDict = True,
    )
    instantiatorToken.startDelimiterText = charStringFromCharTokenList( instantiatorStartDelimiterTokens)

    instantiatorToken.endDelimiterTokens = setAttributeOfTokensInList(
        instantiatorEndDelimiterTokens,
        'parent',
        instantiatorToken,
        removeSelfFromTokensDict = True,  ## because they will all be removed below
    )
    instantiatorToken.endDelimiterText = charStringFromCharTokenList( instantiatorEndDelimiterTokens)
    
    instantiatorToken.originalTokens = setAttributeOfTokensInList(
        instantiatorTokenList,
        'parent',
        instantiatorToken,
        removeSelfFromTokensDict = False,  ## because we'll remove them in a moment
    )

    ## below we set instantiatorToken.content
    charTokenList = []
    contentTokenList = []
    first = last = None
    ctr = 0
    while ctr < len( instantiatorTokens):
        iToken = instantiatorTokens[ ctr]

        if first == None:
            first = iToken.first

        if iToken.tokenTypeName().startswith( 'char_'):
            charTokenList.append( iToken)
            iToken.removeSelfFromTokensDict()  ## char_... tokens are removed here

        else:  ## presumably this is a comment
            if len( charTokenList) > 0:  ## need to make a token out of this text
                if last == None:
                    last = charTokenList[ -1].last
                contentTokenList.append(
                    Token(
                        'instantiatorContent',
                        [],
                        first = first,
                        last = last,
                        text = charStringFromCharTokenList( charTokenList),
                    )
                )
                first = last = None
                charTokenList = []
            contentTokenList.append( iToken)

        ctr += 1

    if len( charTokenList) > 0:
        if last == None:
            last = charTokenList[ -1].last
        contentTokenList.append(
            Token(
                'instantiatorContent',
                [],
                first = first,
                last = last,
                text = charStringFromCharTokenList( charTokenList),
            )
        )
        first = last = None
        charTokenList = []

    instantiatorToken.content = setAttributeOfTokensInList(
        contentTokenList,
        'parent',
        instantiatorToken,
        removeSelfFromTokensDict = True,  ## the new instantiatorContent token(s) and any comment token(s) are removed here.
    )
    ## above we set instantiatorToken.content

    stList = []
    stList.extend( preInstantiatorTokenList)
    stList.append( instantiatorToken)
    stList.extend( postInstantiatorTokenList)

    token.zubTokenLists.append( stList)

    return True

#######################################################
def whitespaceProc( MO):

    global workingToken
    token = workingToken

    preWhitespaceTokenList = token.zubTokenLists[ -1][ : MO.start()]
    whitespaceTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    postWhitespaceTokenList = token.zubTokenLists[ -1][ MO.end() : ]

    whitespaceToken = Token(
        'whitespace',
        [],
        text = charStringFromCharTokenList( whitespaceTokenList),
        first = whitespaceTokenList[ 0].first,
        last = whitespaceTokenList[ -1].last,
    )
    whitespaceToken.originalTokens = setAttributeOfTokensInList(
        whitespaceTokenList,
        'parent',
        whitespaceToken,
        removeSelfFromTokensDict = True,
    )

    stList = []
    stList.extend( preWhitespaceTokenList)
    stList.append( whitespaceToken)
    stList.extend( postWhitespaceTokenList)

    token.zubTokenLists.append( stList)

    return True

#######################################################
def categorySeparatorProc( MO):

    global workingToken, isUSL
    token = workingToken

    isUSL = True  ## because we found a category separator

    preCategorySeparatorTokenList = token.zubTokenLists[ -1][ : MO.start()]
    categorySeparatorTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    postCategorySeparatorTokenList = token.zubTokenLists[ -1][ MO.end() : ]

    categorySeparatorToken = Token(
        'categorySeparator',
        [],
        text = charStringFromCharTokenList( categorySeparatorTokenList),
        first = categorySeparatorTokenList[ 0].first,
        last = categorySeparatorTokenList[ -1].last,
    )
    categorySeparatorToken.originalTokens = setAttributeOfTokensInList(
        categorySeparatorTokenList,
        'parent',
        categorySeparatorToken,
        removeSelfFromTokensDict = True,
    )

    stList = []
    stList.extend( preCategorySeparatorTokenList)
    stList.append( categorySeparatorToken)
    stList.extend( postCategorySeparatorTokenList)

    token.zubTokenLists.append( stList)

    return True

#######################################################
def assignTokenType( groupTypeName, subTokenList):
    rxStr = rxTTStr( subTokenList)
    for layerNumber in range( layerCount):
        if allTokenTypesAtLayerRE[ layerNumber].match( rxStr):
            return '%sAtL%s' % ( groupTypeName, layerNumber), layerNumber
    return '%sAtLx' % ( groupTypeName), None

#######################################################
CIWOnlyRE = re.compile( '^[%s%s%s]*$' % (
        tokenTypeNameToRegExpableChar[ 'whitespace'],
        tokenTypeNameToRegExpableChar[ 'comment'],
        tokenTypeNameToRegExpableChar[ 'instantiator'],
    )
)
#######################################################
def iemlExpressionProc1( MO):

    ## recognize star_categorysep; make categoryExpressionAtLx's out of everything separated by them (or their absence)

    global workingToken, iemlExpressionToken, isUSL
    token = workingToken

    preMOTokenList = token.zubTokenLists[ -1][ : MO.start()]
    MOTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    postMOTokenList = token.zubTokenLists[ -1][ MO.end() : ]

    leadingCategorySepTokenList = MOTokenList[
        len( MO.group( 1))
        :
        len( MO.group( 1)) + len( MO.group( 2))
    ]
    if len( leadingCategorySepTokenList) > 0:
        first = leadingCategorySepTokenList[ 0].first
    else:
        first = None

    categoryTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3))
    ]
    if first is None:
        first = categoryTokenList[ 0].first
    last = categoryTokenList[ -1].last

    trailingCategorySepTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 4))
    ]
    if len( trailingCategorySepTokenList) > 0:
        last = trailingCategorySepTokenList[ -1].last

    if len( leadingCategorySepTokenList) > 0 or len( trailingCategorySepTokenList) > 0 :
        isUSL = True   ## the presence or absence of any category
                       ## separators determines whether the entire
                       ## expression is a USL or (by default) a
                       ## single category expression.

    newToken = Token(
        'categoryExpressionAtLx',
        categoryTokenList,
        parent = token,
        leadingCatSep = leadingCategorySepTokenList,
        leadingCatSepText = charStringFromCharTokenList( leadingCategorySepTokenList),
        trailingCatSep = trailingCategorySepTokenList,
        trailingCatSepText = charStringFromCharTokenList( trailingCategorySepTokenList),
        first = first,
        last = last,
    )
    newTokenList = [ newToken]

    newToken.leadingCatSep = setAttributeOfTokensInList( newToken.leadingCatSep, 'parent', newToken, removeSelfFromTokensDict = True)
    newToken.trailingCatSep = setAttributeOfTokensInList( newToken.trailingCatSep, 'parent', newToken, removeSelfFromTokensDict = True)

    newIemlExpressionSubtokenList = []
    newIemlExpressionSubtokenList.extend( preMOTokenList)
    newIemlExpressionSubtokenList.extend( newTokenList)
    newIemlExpressionSubtokenList.extend( postMOTokenList)

    token.zubTokenLists.append( newIemlExpressionSubtokenList)
    return True

#######################################################
def iemlExpressionProc2( MO):
    ##  Change the tokenType of the root token from 'iemlExpression' to either
    ##  'usl' or 'categoryExpressionAtLx'

    global workingToken, isUSL
    token = workingToken

    if isUSL:  ## in this case, all we have to do is change the root
               ## token's token type from 'iemlExpression' to 'usl'
        token.removeSelfFromTokensDict()
        token.tokenType = tokenTypeNameToRegExpableChar[ 'usl']
        token.addSelfToTokensDict()
    else:      ## in this case, the root token (still 'iemlExpression' at this point) must be effectively *replaced* by
               ## the only category expression token in its content.
        token.removeSelfFromTokensDict()

        onlyCategoryExpressionToken = token.zubTokenLists[ -1][ 0]  
        onlyCategoryExpressionToken.removeSelfFromTokensDict()

        for tokenAttr in dir( token):
            if ( not tokenAttr.startswith( '__')) and ( not isinstance( getattr( token, tokenAttr), types.MethodType)):
                delattr( token, tokenAttr)  ## we're keeping the root token's address, '__class__' attribute, etc., but we're replacing all of its substantive attributes
        for tokenAttr in dir( onlyCategoryExpressionToken):
            if tokenAttr != 'parent' and ( not tokenAttr.startswith( '__')) and ( not isinstance( getattr( onlyCategoryExpressionToken, tokenAttr), types.MethodType)):
                ## root token should not have a 'parent' attribute
                setattr( token, tokenAttr, getattr( onlyCategoryExpressionToken, tokenAttr))

        workingToken = token
        token.addSelfToTokensDict()
    return False  ## never do this again

#######################################################
def containerProc( MO):

    """
    Process containers, recognizing their parameter identifiers if any.
    """

    global workingToken
    token = workingToken

    preInstanceTokenList = token.zubTokenLists[ -1][ : MO.start()]
    instanceTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    postInstanceTokenList = token.zubTokenLists[ -1][ MO.end() : ]

    instanceStartDelimiterTokens = instanceTokenList[ : len( MO.group( 1))]
    instanceTokens = instanceTokenList[
        len( MO.group( 1))
        :
        len( MO.group( 1)) + len( MO.group( 2))
    ]

    instanceParameterIdentifierTokens = []
    instanceParameterIdentifierTokensToBeRemoved = []
    try:
        if MO.group( 4):
            instanceParameterIdentifierTokens = instanceParameterIdentifierTokensToBeRemoved = instanceTokenList[
                len( MO.group( 1)) + len( MO.group( 2))
                :
                len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 4))
            ]
    except IndexError:
        pass

    try:
        if MO.group( 5):
            instanceParameterIdentifierTokens = instanceTokenList[
                len( MO.group( 1)) + len( MO.group( 2)) + len( star_parameterIdentifier[ 0])
                :
                len( MO.group( 1)) + len( MO.group( 2)) + len( star_parameterIdentifier[ 0]) + len( MO.group( 6)) 
            ]
            instanceParameterIdentifierTokensToBeRemoved = instanceTokenList[
                len( MO.group( 1)) + len( MO.group( 2))
                :
                len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 5)) 
            ]
    except IndexError:
        pass

    instancePostParameterIdentifierCIWTokens = instanceTokenList[
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 7))
    ]
    instanceEndDelimiterTokens = instanceTokenList[
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 7))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 7)) + len( MO.group( 8))
    ]

##     newInstanceEndDelimiterTokens = []
##     for instanceEndDelimiterToken in instanceEndDelimiterTokens:
##         if instanceEndDelimiterToken.tokenTypeName() in [ 'comment', 'instantiator', 'whitespace']:
##             CIWTokens.append( instanceEndDelimiterToken)
##         else:
##             newInstanceEndDelimiterTokens.append( instanceEndDelimiterToken)
##     instanceEndDelimiterTokens = newInstanceEndDelimiterTokens

    rxStr = rxTTStr( instanceStartDelimiterTokens)
    if charRecogStr( star_undeterminedSubsetOf[ 0]) in rxStr:
        typeName = 'undeterminedSubsetOf'

    elif charRecogStr( star_diagonal[ 0]) in rxStr:
        typeName = 'diagonal'

    elif charRecogStr( star_group[ 0]) in rxStr:
        typeName = 'group'

    else:                                         ## formerly elif len( rxStr) == 0:
        typeName = 'categoryExpression'
    tokenTypeName, layerNumber = assignTokenType( typeName, instanceTokens)

    first = instanceTokenList[ 0].first
    last = instanceTokenList[ -1].last

    if typeName == 'categoryExpression':  ## we'll fix this one right in place
        instanceToken = token  
        instanceToken.removeSelfFromTokensDict()
        instanceToken.tokenType = tokenTypeNameToRegExpableChar[ tokenTypeName]
        instanceToken.addSelfToTokensDict()
    else:                                 ## need a new token here.
        instanceToken = Token(
            tokenTypeName,
            instanceTokens,
            parent = token,
            first = first,
            last = last,
        )

    if layerNumber != None:
        instanceToken.layerNumber = layerNumber

    if len( instanceStartDelimiterTokens) > 0:
        instanceToken.startDelimiterTokens = setAttributeOfTokensInList(
            instanceStartDelimiterTokens,
            'parent',
            instanceToken,
            removeSelfFromTokensDict = True,
        )
    if len( instanceEndDelimiterTokens) > 0:
        instanceToken.endDelimiterTokens = setAttributeOfTokensInList(
            instanceEndDelimiterTokens,
            'parent',
            instanceToken,
            removeSelfFromTokensDict = True,
        )
    if len( instanceParameterIdentifierTokensToBeRemoved) > 0:
        instanceToken.parameterIdentifierTokens = setAttributeOfTokensInList(
            instanceParameterIdentifierTokensToBeRemoved,
            'parent',
            instanceToken,
            removeSelfFromTokensDict = True,
        )
        instanceToken.parameterIdentifier = charStringFromCharTokenList( instanceParameterIdentifierTokens)
        instanceToken.piFirst = instanceParameterIdentifierTokens[ 0].first
        instanceToken.piLast = instanceParameterIdentifierTokens[ -1].last
    if len( instancePostParameterIdentifierCIWTokens) > 0:
        instanceToken.postParameterIdentifierCIW = setAttributeOfTokensInList(
            instancePostParameterIdentifierCIWTokens,
            'parent',
            instanceToken,
            removeSelfFromTokensDict = True,
        )

    if instanceToken != token:  ## i.e., if we're not changing a categoryExpression token in situ
        stList = []
        stList.extend( preInstanceTokenList)
        stList.append( instanceToken)
        stList.extend( postInstanceTokenList)
        token.zubTokenLists.append( stList)
        return True
    else: ## we're changing a categoryExpression in situ
        stList = []
        stList.extend( preInstanceTokenList)
        stList.extend( instanceTokens)
        stList.extend( postInstanceTokenList)
        token.zubTokenLists.append( stList)
        return False  ## if we don't return False here, we may get into an infinite loop, because nothing may have changed

#######################################################
def L0SymbolProc( MO):

    global workingToken
    token = workingToken

    preMOTokenList = token.zubTokenLists[ -1][ : MO.start()]
    MOTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    postMOTokenList = token.zubTokenLists[ -1][ MO.end() : ]

    preGenOpCIWTokenList = MOTokenList[ : len( MO.group( 1))]
    L0TokenList = MOTokenList[
        len( MO.group( 1))
        :
        len( MO.group( 1)) + len( MO.group( 2))
    ]
    postSeme1CIWTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3))
    ]
    L0LayerMarkTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 4))
    ]
    postGenOpCIWTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 4))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 4)) + len( MO.group( 5))
    ]

    newToken = Token(
        'genOpAtL0',
        [],
        parent = token,
        primitiveSet = L0SymbolsToPrimitiveSets[ chr( ord( L0TokenList[ 0].tokenType) - numberOfNonChrTokenTypes)],
        symbol = charStringFromCharTokenList( L0TokenList),
        symFirst = L0TokenList[ 0].first,
        symLast = L0TokenList[ -1].last,
        layerNumber = 0,
        layerMark = charStringFromCharTokenList( L0LayerMarkTokenList),
        first = MOTokenList[ 0].first,
        last = L0LayerMarkTokenList[ -1].last,  ## so that -last- always points at the layermark
    )

    if len( preGenOpCIWTokenList) > 0:
        newToken.preGenOpCIW = setAttributeOfTokensInList( preGenOpCIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
    if len( postSeme1CIWTokenList) > 0:
        newToken.postSeme1CIW = setAttributeOfTokensInList( postSeme1CIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
    if len( postGenOpCIWTokenList) > 0:
        newToken.postGenOpCIW = setAttributeOfTokensInList( postGenOpCIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
    
    newToken.layerMarkTokens = setAttributeOfTokensInList( L0LayerMarkTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
    newToken.symbolTokens = setAttributeOfTokensInList( L0TokenList, 'parent', newToken, removeSelfFromTokensDict = True,)

    newToken.originalTokens = setAttributeOfTokensInList( MOTokenList, 'parent', newToken, removeSelfFromTokensDict = False,)

    newParentSubtokenList = []
    newParentSubtokenList.extend( preMOTokenList)
    newParentSubtokenList.append( newToken)
    newParentSubtokenList.extend( postMOTokenList)

    token.zubTokenLists.append( newParentSubtokenList)

    return True

#######################################################
def L1SymbolProc( MO):

    global workingToken
    token = workingToken

    preMOTokenList = token.zubTokenLists[ -1][ : MO.start()]
    MOTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    postMOTokenList = token.zubTokenLists[ -1][ MO.end() : ]

    preGenOpCIWTokenList = MOTokenList[
        :
        len( MO.group( 1))
    ]
    L1TokenList = MOTokenList[
        len( MO.group( 1))
        :
        len( MO.group( 1)) + len( MO.group( 2))
    ]
    postSeme3CIWTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 5))
    ]
    L1LayerMarkTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 5))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 5)) + len( MO.group( 6))
    ]
    postGenOpCIWTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 5)) + len( MO.group( 6))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 5)) + len( MO.group( 6)) + len( MO.group( 7))
    ]

    newToken = Token(
        'genOpAtL1',
        [],
        parent = token,
        symbol = charStringFromCharTokenList( L1TokenList),
        symFirst = L1TokenList[ 0].first,
        symLast = L1TokenList[ -1].last,
        layerMark = charStringFromCharTokenList( L1LayerMarkTokenList),
        semes = [],
        first = MOTokenList[ 0].first,
        last = L1LayerMarkTokenList[ -1].last,  ## so that -last- always points at the layermark
        layerNumber = 1,
    )
    if len( preGenOpCIWTokenList) > 0:
        newToken.preGenOpCIW = setAttributeOfTokensInList( preGenOpCIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
    if len( postSeme3CIWTokenList) > 0:
        newToken.postSeme3CIW = setAttributeOfTokensInList( postSeme3CIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
    if len( postGenOpCIWTokenList) > 0:
        newToken.postGenOpCIW = setAttributeOfTokensInList( postGenOpCIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
    
    newToken.layerMarkTokens = setAttributeOfTokensInList( L1LayerMarkTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
    newToken.symbolTokens = setAttributeOfTokensInList( L1TokenList, 'parent', newToken, removeSelfFromTokensDict = True,)

    newToken.originalTokens = setAttributeOfTokensInList( MOTokenList, 'parent', newToken)

    ## add the L0 semes.
    for primitiveFrozenSet in L1SymbolsToPrimitiveSets[ charStringFromCharTokenList( L1TokenList)]:
        newToken.semes.append( Token(
                'genOpAtL0',
                [],
                layerMark = star_L0LayerMark,
                symbol = list( primitiveFrozenSet)[ 0],
                primitiveSet = primitiveFrozenSet,
                first = L1TokenList[ 0].first,
                last = L1TokenList[ -1].last,
                layerNumber = 0,
                parent = newToken,
                implicit = True,
        ))

    newParentSubtokenList = []
    newParentSubtokenList.extend( preMOTokenList)
    newParentSubtokenList.append( newToken)
    newParentSubtokenList.extend( postMOTokenList)

    token.zubTokenLists.append( newParentSubtokenList)

    return True

#######################################################
def genOpProc( MO):

    global workingToken
    token = workingToken

    preMOTokenList = token.zubTokenLists[ -1][ : MO.start()]
    MOTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    postMOTokenList = token.zubTokenLists[ -1][ MO.end() : ]

    MOGroupLengths = [ None, ]  ## here, None is a placeholder so the indexes into this list will be the same as the corresponding MO.group() arguments.
    ctr = 1
    while True:
        try:
            MO.group( ctr)
        except IndexError:
            break
        if MO.group( ctr) == None:
            MOGroupLengths.append( 0)  
        else:
            MOGroupLengths.append( len( MO.group( ctr)))
        ctr += 1

    CIWListOfTokenLists = []
    semesListOfTokenLists = []

    preGenOpTokenList = MOTokenList[ : MOGroupLengths[ 1]]  ## we will LEAVE THESE RIGHT WHERE THEY ARE!

    preGenOpCIWTokenList = MOTokenList[  ## immediately-preceding CIWs only
        MOGroupLengths[ 1]
        :
        MOGroupLengths[ 1] + MOGroupLengths[ 2]
    ]

    semesListOfTokenLists.append(       ## semesListOfTokenLists[ 0] : R1 seme.
        MOTokenList[
            MOGroupLengths[ 1] + MOGroupLengths[ 2]
            :
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3]
        ]
    )
    postSeme1CIWTokenList = MOTokenList[        ## comments, instantiators, and whitespace after seme 1
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3]
            :
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 4]
        ]
    semesListOfTokenLists.append(       ## semesListOfTokenLists[ 1] : R2 seme.
        MOTokenList[
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 4]
            :
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 4] + MOGroupLengths[ 6]
        ]
    )
    postSeme2CIWTokenList = MOTokenList[        ## comments, instantiators, and whitespace after seme 2
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 4] + MOGroupLengths[ 6]
            :
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 4] + MOGroupLengths[ 6] + MOGroupLengths[ 7]
        ]
    semesListOfTokenLists.append(       ## semesListOfTokenLists[ 2] : R3 seme.
        MOTokenList[
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 4] + MOGroupLengths[ 6] + MOGroupLengths[ 7]
            :
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 4] + MOGroupLengths[ 6] + MOGroupLengths[ 7] + MOGroupLengths[ 9]
        ]
    )
    postSeme3CIWTokenList = MOTokenList[        ## comments, instantiators, and whitespace after seme 3
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 4] + MOGroupLengths[ 6] + MOGroupLengths[ 7] + MOGroupLengths[ 9]
            :
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 4] + MOGroupLengths[ 6] + MOGroupLengths[ 7] + MOGroupLengths[ 9] + MOGroupLengths[ 10]
        ]
    layerMarkTokenList = (
        MOTokenList[
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 4] + MOGroupLengths[ 6] + MOGroupLengths[ 7] + MOGroupLengths[ 9] + MOGroupLengths[ 10]
            :
            MOGroupLengths[ 1] + MOGroupLengths[ 2] + MOGroupLengths[ 3] + MOGroupLengths[ 4] + MOGroupLengths[ 6] + MOGroupLengths[ 7] + MOGroupLengths[ 9] + MOGroupLengths[ 10] + MOGroupLengths[ 11]
        ]
    )

    layerNumber = int( semesListOfTokenLists[ 0][ 0].tokenTypeName()[ -1]) + 1
                    ## weird but works.  Last character in the token's tokenTypeName is in the range ord( '0') to ( ( ord( '0') + layerCount) - 1))
                    ## the "+ 1" part accounts for the fact that the semes are at L - 1, if the genOp is at L.
    newTokenType = 'genOpAtL%d' % (
        layerNumber,
    )

    newToken = Token(
        newTokenType,
        [],
        parent = token,
        layerNumber = layerNumber,
        semes = [],
        first = semesListOfTokenLists[ 0][ 0].first,
        last = layerMarkTokenList[ -1].last,
    )

    if len( preGenOpCIWTokenList) > 0:
        newToken.preGenOpCIW = setAttributeOfTokensInList( preGenOpCIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
    if len( postSeme1CIWTokenList) > 0:
        newToken.postSeme1CIW = setAttributeOfTokensInList( postSeme1CIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
    if len( postSeme2CIWTokenList) > 0:
        newToken.postSeme2CIW = setAttributeOfTokensInList( postSeme2CIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
    if len( postSeme3CIWTokenList) > 0:
        newToken.postSeme3CIW = setAttributeOfTokensInList( postSeme3CIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)

    newToken.originalTokens = setAttributeOfTokensInList( MOTokenList, 'parent', newToken, removeSelfFromTokensDict = False,)

    roleNumber = 0 
    for semeTokenList in semesListOfTokenLists:  ## we're rolling through the semes, 0-2 (i.e. R1-R3)
        roleNumber += 1
        if roleNumber > 3: break
        if len( semeTokenList) == 0: break
        newToken.semes.extend( setAttributeOfTokensInList( semeTokenList, 'parent', newToken, removeSelfFromTokensDict = False,))
        newToken.semes[ -1].roleNumber = roleNumber

    newToken.layerMarkTokens = setAttributeOfTokensInList(
        layerMarkTokenList,
        'parent',
        newToken,
        removeSelfFromTokensDict = True,
    )
    newToken.layerMark = charStringFromCharTokenList( layerMarkTokenList)

    newParentSubtokenList = []
    newParentSubtokenList.extend( preMOTokenList)
    newParentSubtokenList.extend( preGenOpTokenList)  ## we are LEAVING THESE WHERE THEY ARE!
    newParentSubtokenList.append( newToken)
    newParentSubtokenList.extend( postMOTokenList)

    token.zubTokenLists.append( newParentSubtokenList)

    return True

#######################################################
def nonGenOpProc( MO):

    global workingToken
    token = workingToken

    preMOTokenList = token.zubTokenLists[ -1][ : MO.start()]
    MOTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    postMOTokenList = token.zubTokenLists[ -1][ MO.end() : ]

    preNonGenOpCIWTokenList = MOTokenList[ : len( MO.group( 1))]
    operand1TokenList = MOTokenList[
        len( MO.group( 1))
        :
        len( MO.group( 1)) + len( MO.group( 2))
    ]
    preOperatorCIWTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3))
    ]
    operatorTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 4))
    ]
    postOperatorCIWTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 4))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 4)) + len( MO.group( 5))
    ]
    operand2TokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 4)) + len( MO.group( 5))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 4)) + len( MO.group( 5)) + len( MO.group( 6))
    ]
    postNonGenOpCIWTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 4)) + len( MO.group( 5)) + len( MO.group( 6))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3)) + len( MO.group( 4)) + len( MO.group( 5)) + len( MO.group( 6)) + len( MO.group( 7))
    ]

    layerNumber = int( operand1TokenList[ 0].tokenTypeName()[ -1])
    operatorSymbol = charStringFromCharTokenList( operatorTokenList)
    newToken = Token(
        '%sAtL%s' % (
            operatorSymbolToOperationName[ operatorSymbol],
            layerNumber,
        ),
        [],
        operator = operatorSymbol,
        first = MOTokenList[ 0].first,
        last = MOTokenList[ -1].last,
        layerNumber = layerNumber,
        parent = token,
        opFirst = operatorTokenList[ 0].first,
        opLast = operatorTokenList[ -1].last,
    )
    newToken.operands = setAttributeOfTokensInList(
        [ operand1TokenList[ 0], operand2TokenList[ 0], ],
        'parent',
        newToken,
        removeSelfFromTokensDict = False,
    )
    if len( preNonGenOpCIWTokenList) > 0:
        newToken.preNonGenOpCIW = setAttributeOfTokensInList( preNonGenOpCIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
        addCIWToContainingCategoryExpression( preNonGenOpCIWTokenList, token)
    if len( preOperatorCIWTokenList) > 0:
        newToken.preOperatorCIW = setAttributeOfTokensInList( preOperatorCIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
        addCIWToContainingCategoryExpression( preOperatorCIWTokenList, token)
    if len( postOperatorCIWTokenList) > 0:
        newToken.postOperatorCIW = setAttributeOfTokensInList( postOperatorCIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
        addCIWToContainingCategoryExpression( postOperatorCIWTokenList, token)
    if len( postNonGenOpCIWTokenList) > 0:
        newToken.postNonGenOpCIW = setAttributeOfTokensInList( postNonGenOpCIWTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)
        addCIWToContainingCategoryExpression( postNonGenOpCIWTokenList, token)

    newToken.operatorTokens = setAttributeOfTokensInList( operatorTokenList, 'parent', newToken, removeSelfFromTokensDict = True,)

    newToken.originalTokens = setAttributeOfTokensInList( MOTokenList, 'parent', newToken, removeSelfFromTokensDict = False,)

    newParentSubtokenList = []
    newParentSubtokenList.extend( preMOTokenList)
    newParentSubtokenList.append( newToken)
    newParentSubtokenList.extend( postMOTokenList)

    token.zubTokenLists.append( newParentSubtokenList)

    return True

#######################################################
def addCIWToContainingCategoryExpression( tokenList, token):
    categoryExpressionToken = token.findAncestorOfType( 'categoryExpression')
    if categoryExpressionToken is None:
        if token.tokenTypeName().startswith( 'categoryExpression'):
            categoryExpressionToken = token
        else:
            errMsg( 'internal error: can\'t find the ancestor categoryExpression')
            if tokenTweakDebug:
                import pdb
                pdb.set_trace()
            sys.exit( 1)
    if not hasattr( categoryExpressionToken, 'categoryCIWTokenLists'):
        categoryExpressionToken.categoryCIWTokenLists = []
    categoryExpressionToken.categoryCIWTokenLists.append( tokenList)

#######################################################
def replaceTokenInParentSubtokenList( token, newToken):
    global iemlExpressionToken

    if token == iemlExpressionToken:
        iemlExpressionToken = newToken
        return

    parentToken = token.parent
    found = False
    for attributeName in [
        'zubTokenLists',
        'complexAtL0',
        'complexAtL1',
        'complexAtL2',
        'complexAtL3',
        'complexAtL4',
        'complexAtL5',
        'complexAtL6',
        'semes',
        'operands',
    ]:
        if not hasattr( parentToken, attributeName): continue
        if attributeName == 'zubTokenLists':
            tokenList = parentToken.zubTokenLists[ -1]
        else:
            tokenList = getattr( parentToken, attributeName)
        if token in tokenList:
            found = True
            newSubTokenList = []
            for jToken in tokenList:
                if jToken == token:
                    newSubTokenList.append( newToken)
                else:
                    newSubTokenList.append( jToken)
            break
    if found:
        if attributeName == 'zubTokenLists':
            parentToken.zubTokenLists.append( newSubTokenList)
        else:
            setattr( parentToken, attributeName, newSubTokenList)
    else:
        errMsg( 'internal error: parent token did not list token as subtoken.')
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)

    newToken.parent = parentToken

#######################################################
def promotionProc1( MO):

    """ If we now know the layer numbers of everything inside the
    container (workingToken), and we haven't already copied their
    layer number to the container, we set the container's layerNumber
    attribute and change the container's tokenType so that it no
    longer ends with 'x' and instead ends with the layer number."""

    global workingToken
    token = workingToken


    if hasattr( token, 'layerNumber'): return

    MOTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    promotableToken = MOTokenList[ len( MO.group( 2)) : len( MO.group( 2)) + len( MO.group( 3))][ 0]
    
    newTokenTypeName = '%sAtL%s' % (
        re.search( '(^.*?)(AtLx$)', token.tokenTypeName()).group(1),
        promotableToken.layerNumber,
    )

    token.removeSelfFromTokensDict()
    workingToken = token.copy( newTokenTypeName = newTokenTypeName)
    workingToken.layerNumber = promotableToken.layerNumber
    replaceTokenInParentSubtokenList( token, workingToken)
    

#######################################################
def promotionProc2( MO):

    """ This is where the collapsing ("telescoping") of the containers happens.
    workingToken is a container.  The token it contains is "promoted" in the
    hierarchy so that it occupies the same position in the
    hierarchy that the container was in.  The promoted token gets the
    parent of the container, or None if the container was the root.
    The container is added to the "containedIn"
    list of the promoted token.
    """

    global workingToken
    token = workingToken

    preMOTokenList = token.zubTokenLists[ -1][ : MO.start()]
    MOTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    postMOTokenList = token.zubTokenLists[ -1][ MO.end() : ]

    leadingWhitespaceTokenList = MOTokenList[ : len( MO.group( 1))]
    promotableTokenList = MOTokenList[
        len( MO.group( 1))
        :
        len( MO.group( 1)) + len( MO.group( 2))
    ]
    trailingWhitespaceTokenList = MOTokenList[
        len( MO.group( 1)) + len( MO.group( 2))
        :
        len( MO.group( 1)) + len( MO.group( 2)) + len( MO.group( 3))
    ]

    if hasattr( workingToken, 'parent'):
        savedParent = workingToken.parent
    else:
        savedParent = None
    promotableToken = promotableTokenList[ 0]  ## there is always exactly one
    workingToken = promotableToken.copy()
    promotableToken.removeSelfFromTokensDict()
    workingToken.parent = savedParent

    replaceTokenInParentSubtokenList( token, workingToken)

    if not hasattr( workingToken, 'containedIn'):
        workingToken.containedIn = []
    workingToken.containedIn.append( token)
    token.removeSelfFromTokensDict()

    if hasattr( token, 'CIW') and len( token.CIW) > 0:
        if not hasattr( workingToken, 'CIW'):
            workingToken.CIW = []
        workingToken.CIW.extend( token.CIW)

    if hasattr( token, 'roleNumber'):
        workingToken.roleNumber = token.roleNumber

    if hasattr( token, 'prologueText'):
        workingToken.prologueText = token.prologueText

    if hasattr( token, 'epilogueText'):
        workingToken.epilogueText = token.epilogueText

    return True

#######################################################
def uslProc( MO):  ## This is where the categoryExpressions get sorted out into their respective layers

    global workingToken
    token = workingToken

    preMOTokenList = token.zubTokenLists[ -1][ : MO.start()]
    MOTokenList = token.zubTokenLists[ -1][ MO.start() : MO.end()]
    postMOTokenList = token.zubTokenLists[ -1][ MO.end() : ]

    for thisToken in MOTokenList:
        categoryExpressionListLayerNumber = thisToken.layerNumber
        categoryExpressionListAttributeName = 'complexAtL%d' % ( categoryExpressionListLayerNumber)
        if not hasattr( token, categoryExpressionListAttributeName):
           setattr( token, categoryExpressionListAttributeName, [])
        getattr( token, categoryExpressionListAttributeName).extend(
            setAttributeOfTokensInList(
                [ thisToken],
                'parent',
                token,
            ),
        )
    token.zubTokenLists.append( [])

    return False  ## never need to do this again

#######################################################

ttDispatchList = [
    ## ttDispatchList[ 0]: handle outer delimiters, if any
    [
        (
            ## Documentation for tuples like this one:
            ## tuple[0] = a list of the token types to be processed (actually a dict)
            ## tuple[1] = regexp for recognizing subnode pattern.  Must be satisfied by the types of token's subtokens.
            ## tuple[2] = name of processing function to call when both of the above conditions are satisfied.
            ##            NOTE: All processing functions MUST return
            ##            True if they change anything, False if not.

            {
                tokenTypeNameToRegExpableChar[ 'iemlExpression']: None,
            },

            '^[%s-%s]+$' % (
                chr( ordinalOfLeastChar),
                chr( MAXUNICODEVALUE),
            ),
            outerDelimsProc,
        )
    ],


    ## ttDispatchList[ 1]: things recognized only at the beginning of parsing: comments, instantiators, whitespace
    [
        (

            {
                tokenTypeNameToRegExpableChar[ 'iemlExpression']: None,
            },

            '(%s)(.*?)(%s)' % (
                charRecogStr( star_comment[ 0]),
                charRecogStr( star_comment[ 1]),
            ),
            commentProc,
        ),

        (
            {
                tokenTypeNameToRegExpableChar[ 'iemlExpression']: None,
            },

            '(%s)(.*?)(%s)' % (
                charRecogStr( star_instantiator[ 0]),
                charRecogStr( star_instantiator[ 1]),
            ),
            instantiatorProc,
        ),

        (
            {
                tokenTypeNameToRegExpableChar[ 'iemlExpression']: None,
            },

            '[%s]+' % (
                charRecogStr( '\011\012\015 '),
            ),
            whitespaceProc,
        ),

        (
            {
                tokenTypeNameToRegExpableChar[ 'iemlExpression']: None,
            },

            '[%s]' % (
                charRecogStr( star_categorysep),
            ),
            categorySeparatorProc,
        ),

        (
            {
                tokenTypeNameToRegExpableChar[ 'iemlExpression']: None,
                tokenTypeNameToRegExpableChar[ 'usl']: None,
            },


##            ------?-------  -1-  --2---  -3-
            '((?<=^)|(?<=%s))(%s?)([^%s]+)(%s?)' % (

                tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLx'],  ## since this function runs repeatedly,
                                                                           ## there may already be a leading categoryExpressionAtLx

                tokenTypeNameToRegExpableChar[ 'categorySeparator'],

                ''.join( [
                    tokenTypeNameToRegExpableChar[ 'categorySeparator'],
                    tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLx'],
                ]),
                
                tokenTypeNameToRegExpableChar[ 'categorySeparator'],

            ),

            iemlExpressionProc1,  ## result is a categoryExpression or the addition of tokens to the uslCIW attribute
                                  ## of the usl token, for each partition delimited by star_categorysep 

        ),

        ## At this point, there should only be categoryExpressionAtLx tokens as the subtokens of the ieml expression

        (
            {
                tokenTypeNameToRegExpableChar[ 'iemlExpression']: None,
                tokenTypeNameToRegExpableChar[ 'usl']: None,
            },

            '(^)([%s]+)($)' % (
                tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLx'],

            ),

            iemlExpressionProc2,  ## result is a single token which is
                                  ## either a 'categoryExpressionAtLx'
                                  ## (i.e., it's left as it is) or a
                                  ## 'usl' that contains at least one
                                  ## 'categoryExpressionAtLx'.
        ),

        ## After this point, there is no iemlExpression token anywhere.
        ## The original one -- the root token -- became a 'usl' 
        ## or a 'categoryExpressionAtLx' in iemlExpressionProc2.

    ],
    [   ## ttDispatchList[ 2] begins here.  Parses all containers: groups, diagonals, and undeterminedSubsetOfs. These need to be done repeatedly until nothing changes.

        (
            { 
                tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLx']: None,
            },
##                                             --6--             ## parameter identifier is either group( 4) or group( 6)
##                               ---4----   -----5------
##            -1  --2-------    -----------3-------------  --7--  -8
            '(%s)([^%s%s-%s]*?)(([%s-%s]*)|(%s([%s]*)%s))?([%s]*)(%s)' % (


                '^',     ## start delimiter (i.e., we have to start at the beginning)

                ''.join( [
                    charRecogStr( star_parameterIdentifier[ 0]), ## no parameter identifiers until the end
                ]),
                charRecogStr( '0'), charRecogStr( '9'), ## no parameter identifiers until the end
                         ## NOTE: comments, instantiators, and whitespaces may appear here and they should be left where they are

                charRecogStr( '0'), charRecogStr( '9'),  ## range of chars in undelimited parameter identifiers.

                charRecogStr( star_parameterIdentifier[ 0]),
                ''.join( [   ## 'A-Za-z0-9.:_-'  ## legal chars in delimited (doublequoted) parameter identifiers
                    charRecogStr( 'A'),
                    '-',
                    charRecogStr( 'Z'),

                    charRecogStr( 'a'),
                    '-',
                    charRecogStr( 'z'),

                    charRecogStr( '0'),
                    '-',
                    charRecogStr( '9'),

                    charRecogStr( '.'),
                    charRecogStr( ':'),
                    charRecogStr( '_'),
                    charRecogStr( '-'),
                ]),
                charRecogStr( star_parameterIdentifier[ 1]),

                ''.join( [  ## These should be left where they are for processing elsewhere, but we need to preserve them if they exist.
                    tokenTypeNameToRegExpableChar[ 'whitespace'],
                    tokenTypeNameToRegExpableChar[ 'comment'],
                    tokenTypeNameToRegExpableChar[ 'instantiator'],
                ]),

                '$',     ## end delimiter (i.e., we have to end at the end)
            ),
            containerProc,
        ),  ## here ends recognition of category expressions

        (
            dictOfTokenTypesFromStartingAndEndingTokenTypeNames(
                (
                    (
                        'primitiveUnionAtLx',
                        'categoryExpressionAtLx',
                    ),
                ),
            ),        ## this range includes everything that ends in "Lx" (things whose layernumbers are still unknown)

##                                             --6--             ## parameter identifier is either group( 4) or group( 6)
##                               ---4----   -----5------
##            -1  --2-------    -----------3-------------  --7--  -8
            '(%s)([^%s%s-%s]*?)(([%s-%s]*)|(%s([%s]*)%s))?([%s]*)(%s)' % (

                charRecogStr( star_undeterminedSubsetOf[ 0]),  ## start delimiter

                ''.join( [ 
                    charRecogStr( star_undeterminedSubsetOf[ 0]),  ## no undeterminedSubsetOf starts in undeterminedSubsetOfs, please
                    charRecogStr( star_undeterminedSubsetOf[ 1]),  ## no undeterminedSubsetOf ends in undeterminedSubsetOfs, please
                    charRecogStr( star_parameterIdentifier[ 0]), ## no parameter identifiers until the end
                ]),
                charRecogStr( '0'), charRecogStr( '9'),        ## no parameter identifiers until the end

                charRecogStr( '0'), charRecogStr( '9'),  ## range of chars in a parameter identifier

                charRecogStr( star_parameterIdentifier[ 0]),
                ''.join( [   ## 'A-Za-z0-9.:_-'  ## legal chars in delimited (doublequoted) parameter identifiers
                    charRecogStr( 'A'),
                    '-',
                    charRecogStr( 'Z'),

                    charRecogStr( 'a'),
                    '-',
                    charRecogStr( 'z'),

                    charRecogStr( '0'),
                    '-',
                    charRecogStr( '9'),

                    charRecogStr( '.'),
                    charRecogStr( ':'),
                    charRecogStr( '_'),
                    charRecogStr( '-'),
                ]),
                charRecogStr( star_parameterIdentifier[ 1]),

                ''.join( [  ## These should be left where they are for processing elsewhere, but we need to preserve them if they exist.
                    tokenTypeNameToRegExpableChar[ 'whitespace'],
                    tokenTypeNameToRegExpableChar[ 'comment'],
                    tokenTypeNameToRegExpableChar[ 'instantiator'],
                ]),

                charRecogStr( star_undeterminedSubsetOf[ 1]),  ## end delimiter
            ),
            containerProc,
        ),

        (
            dictOfTokenTypesFromStartingAndEndingTokenTypeNames(
                (
                    (
                        'primitiveUnionAtLx',
                        'categoryExpressionAtLx',
                    ),
                ),
            ),        ## this range includes everything that ends in "Lx" (things whose layernumbers are still unknown)

##                                             --6--             ## parameter identifier is either group( 4) or group( 6)
##                               ---4----   -----5------
##            -1  --2-------    -----------3-------------  --7--  -8
            '(%s)([^%s%s-%s]*?)(([%s-%s]*)|(%s([%s]*)%s))?([%s]*)(%s)' % (

                charRecogStr( star_diagonal[ 0]),  ## start delimiter

                ''.join( [ 
                    charRecogStr( star_diagonal[ 0]),  ## no diagonal starts in diagonals, please
                    charRecogStr( star_diagonal[ 1]),  ## no diagonal ends in diagonals, please
                    charRecogStr( star_parameterIdentifier[ 0]), ## no parameter identifiers until the end
                ]),
                charRecogStr( '0'), charRecogStr( '9'),        ## no parameter identifiers until the end

                charRecogStr( '0'), charRecogStr( '9'),  ## range of chars in a parameter identifier

                charRecogStr( star_parameterIdentifier[ 0]),
                ''.join( [   ## 'A-Za-z0-9.:_-'  ## legal chars in delimited (doublequoted) parameter identifiers
                    charRecogStr( 'A'),
                    '-',
                    charRecogStr( 'Z'),

                    charRecogStr( 'a'),
                    '-',
                    charRecogStr( 'z'),

                    charRecogStr( '0'),
                    '-',
                    charRecogStr( '9'),

                    charRecogStr( '.'),
                    charRecogStr( ':'),
                    charRecogStr( '_'),
                    charRecogStr( '-'),
                ]),
                charRecogStr( star_parameterIdentifier[ 1]),

                ''.join( [  ## These should be left where they are for processing elsewhere, but we need to preserve them if they exist.
                    tokenTypeNameToRegExpableChar[ 'whitespace'],
                    tokenTypeNameToRegExpableChar[ 'comment'],
                    tokenTypeNameToRegExpableChar[ 'instantiator'],
                ]),

                charRecogStr( star_diagonal[ 1]),  ## end delimiter
            ),
            containerProc,
        ),

        (
            dictOfTokenTypesFromStartingAndEndingTokenTypeNames(
                (
                    (
                        'primitiveUnionAtLx',
                        'categoryExpressionAtLx',
                    ),
                ),
            ),        ## this range includes everything that ends in "Lx" (things whose layernumbers are still unknown)

##                                             --6--             ## parameter identifier is either group( 4) or group( 6)
##                               ---4----   -----5------
##            -1  --2-------    -----------3-------------  --7--  -8
            '(%s)([^%s%s-%s]*?)(([%s-%s]*)|(%s([%s]*)%s))?([%s]*)(%s)' % (

                charRecogStr( star_group[ 0]),  ## start delimiter

                ''.join( [ 
                    charRecogStr( star_group[ 0]),  ## no group starts in groups, please
                    charRecogStr( star_group[ 1]),  ## no group ends in groups, please
                    charRecogStr( star_parameterIdentifier[ 0]), ## no parameter identifiers until the end
                ]),
                charRecogStr( '0'), charRecogStr( '9'),        ## no parameter identifiers until the end

                charRecogStr( '0'), charRecogStr( '9'),  ## range of chars in a parameter identifier

                charRecogStr( star_parameterIdentifier[ 0]),
                ''.join( [   ## 'A-Za-z0-9.:_-'  ## legal chars in delimited (doublequoted) parameter identifiers
                    charRecogStr( 'A'),
                    '-',
                    charRecogStr( 'Z'),

                    charRecogStr( 'a'),
                    '-',
                    charRecogStr( 'z'),

                    charRecogStr( '0'),
                    '-',
                    charRecogStr( '9'),

                    charRecogStr( '.'),
                    charRecogStr( ':'),
                    charRecogStr( '_'),
                    charRecogStr( '-'),
                ]),
                charRecogStr( star_parameterIdentifier[ 1]),

                ''.join( [  ## These should be left where they are for processing elsewhere, but we need to preserve them if they exist.
                    tokenTypeNameToRegExpableChar[ 'whitespace'],
                    tokenTypeNameToRegExpableChar[ 'comment'],
                    tokenTypeNameToRegExpableChar[ 'instantiator'],
                ]),

                charRecogStr( star_group[ 1]),  ## end delimiter
            ),
            containerProc,
        ),

    ],
    [  ## ttDispatchList[ 3] begins here

        (
            {
                tokenTypeNameToRegExpableChar[ 'usl']: None,
                tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLx']: None,
                tokenTypeNameToRegExpableChar[ 'undeterminedSubsetOfAtLx']: None,
                tokenTypeNameToRegExpableChar[ 'diagonalAtLx']: None,
                tokenTypeNameToRegExpableChar[ 'groupAtLx']: None,
            },

##            --1--  --2-  --3--  -4--  --5--
            '([%s]*)([%s])([%s]*)([%s])([%s]*)' % (

                ''.join( [
                    tokenTypeNameToRegExpableChar[ 'comment'],
                    tokenTypeNameToRegExpableChar[ 'instantiator'],
                    tokenTypeNameToRegExpableChar[ 'whitespace'],
                ]),

                ''.join( [ 
                    charRecogStr( star_I),
                    charRecogStr( star_F),
                    charRecogStr( star_E),
                    charRecogStr( star_M),
                    charRecogStr( star_O),
                    charRecogStr( star_U),
                    charRecogStr( star_A),
                    charRecogStr( star_S),
                    charRecogStr( star_B),
                    charRecogStr( star_T),
                ]),

                ''.join( [ 
                    tokenTypeNameToRegExpableChar[ 'comment'],
                    tokenTypeNameToRegExpableChar[ 'instantiator'],
                    tokenTypeNameToRegExpableChar[ 'whitespace'],
                ]), 

                ''.join( [ 
                    charRecogStr( star_fillWithCompletenessLayerMark),
                    charRecogStr( star_fillWithPrecedingSemeLayerMark),
                    charRecogStr( star_L0LayerMark),
                ]), 

                ''.join( [
                    tokenTypeNameToRegExpableChar[ 'comment'],
                    tokenTypeNameToRegExpableChar[ 'instantiator'],
                    tokenTypeNameToRegExpableChar[ 'whitespace'],
                ]),
            ),

            L0SymbolProc,    ## creates genOpAtL0 token
        ),

        (
            {
                tokenTypeNameToRegExpableChar[ 'usl']: None,
                tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLx']: None,
                tokenTypeNameToRegExpableChar[ 'undeterminedSubsetOfAtLx']: None,
                tokenTypeNameToRegExpableChar[ 'diagonalAtLx']: None,
                tokenTypeNameToRegExpableChar[ 'groupAtLx']: None,
            },

##                    --3---   -4--                             ## whitespace: 1, 5, 7
##            -1---  ------2-------   --5--  -6--  -7--         ## symbol: 2    layermark 6
            '([%s]*)((%s[%s])|([%s]))([%s]*)([%s])([%s]*)' % (

                ''.join( [
                    tokenTypeNameToRegExpableChar[ 'comment'],
                    tokenTypeNameToRegExpableChar[ 'instantiator'],
                    tokenTypeNameToRegExpableChar[ 'whitespace'],
                ]),

                charRecogStr( star_wo[0]), ## just the w

                ''.join( [ 
                    charRecogStr( star_wo[ 1]),
                    charRecogStr( star_wa[ 1]),
                    charRecogStr( star_wu[ 1]),
                    charRecogStr( star_we[ 1]),
                ]),

                ''.join( [
                    charRecogStr( star_y),
                    charRecogStr( star_o),
                    charRecogStr( star_e),
                    charRecogStr( star_u),
                    charRecogStr( star_a),
                    charRecogStr( star_i),
                    charRecogStr( star_j),
                    charRecogStr( star_g),
                    charRecogStr( star_h),
                    charRecogStr( star_c),
                    charRecogStr( star_p),
                    charRecogStr( star_x),
                    charRecogStr( star_s),
                    charRecogStr( star_b),
                    charRecogStr( star_t),
                    charRecogStr( star_k),
                    charRecogStr( star_m),
                    charRecogStr( star_n),
                    charRecogStr( star_d),
                    charRecogStr( star_f),
                    charRecogStr( star_l),
                ]),

                ''.join( [
                    tokenTypeNameToRegExpableChar[ 'comment'],
                    tokenTypeNameToRegExpableChar[ 'instantiator'],
                    tokenTypeNameToRegExpableChar[ 'whitespace'],
                ]),

                ''.join( [  
                    charRecogStr( star_fillWithCompletenessLayerMark),
                    charRecogStr( star_fillWithPrecedingSemeLayerMark),
                    charRecogStr( star_L1LayerMark),
                ]),

                ''.join( [
                    tokenTypeNameToRegExpableChar[ 'comment'],
                    tokenTypeNameToRegExpableChar[ 'instantiator'],
                    tokenTypeNameToRegExpableChar[ 'whitespace'],
                ]),

            ),

            L1SymbolProc,    ## creates genOpAtL1 token
        ),

    ],  ## end of ttDispatchList[ 3]

    [],  ## ttDispatchList[ 4]:  this list will be populated below (see "## populate ttDispatchList[ 4]" in several places)
         ##  These need to be done repeatedly until nothing changes.  Handles genOps and nonGenOps, containerProc2 and promotionProc1 (but not promotionProc2)

    [   ## ttDispatchList[ 5]: uslProc().  This is where the categoryExpressions get sorted out into their respective layers
        (
            {
                tokenTypeNameToRegExpableChar[ 'usl']: None,
            },

            '^[%s-%s]+$' % (
                tokenTypeNameToRegExpableChar[ 'genOpAtL0'],
                tokenTypeNameToRegExpableChar[ 'categoryExpressionAtL%d' % ( layerCount - 1)],
            ),
            uslProc,
        ),
    ],

    [   ## ttDispatchList[ 6]: promotionProc2.  This is where containers get telescoped and sort of disappear.
        (
            ## all kinds of groups, undeterminedSubsetOfs, and diagonals
    
            dictOfTokenTypesFromStartingAndEndingTokenTypeNames(
                (
                    (
                        'groupAtLx',
                        'categoryExpressionAtLx',
                    ),
                    (
                        'groupAtL0',
                        'categoryExpressionAtL%d' % ( layerCount - 1),
                    ),
                ),
            ),

            '(%s*)([%s-%s])(%s*)' % (
                tokenTypeNameToRegExpableChar[ 'whitespace'],
                
                tokenTypeNameToRegExpableChar[ 'genOpAtL0'],                  ## This range includes everything that 
                tokenTypeNameToRegExpableChar[ 'sequenceIntersectionAtL%d' % ( layerCount - 1)],   ## knows its layer and is not group-like.

                tokenTypeNameToRegExpableChar[ 'whitespace'],
            ),
            promotionProc2,
        ),
    ],
    [   ## ttDispatchList[ 7]: helpful error detectors
        (
            dictOfTokenTypesFromStartingAndEndingTokenTypeNames(
                (
                    (
                        'primitiveUnionAtLx',
                        'categoryExpressionAtLx',
                    ),
                ),
            ),

            '([%s-%s])(%s*)([%s])(%s*)([%s-%s%s-%s])' % (
                    tokenTypeNameToRegExpableChar[ 'genOpAtL0'],
                    tokenTypeNameToRegExpableChar[ 'categoryExpressionAtL%d' % ( layerCount - 1)],

                    tokenTypeNameToRegExpableChar[ 'whitespace'],

                    ''.join( [ 
                        charRecogStr( star_primitive_union),
                        charRecogStr( star_primitive_intersection),
                        charRecogStr( star_primitive_difference),
                        charRecogStr( star_sequence_union),
                        charRecogStr( star_sequence_intersection),
                        charRecogStr( star_sequence_difference),
                    ]),

                    tokenTypeNameToRegExpableChar[ 'whitespace'],

                    tokenTypeNameToRegExpableChar[ 'genOpAtL0'],
                    tokenTypeNameToRegExpableChar[ 'diagonalAtL%d' % ( layerCount - 1)],
                    tokenTypeNameToRegExpableChar[ 'categoryExpressionAtL0'],
                    tokenTypeNameToRegExpableChar[ 'categoryExpressionAtL%d' % ( layerCount - 1)],

            ),
            nonGenOpError,
        ),

        (
            dictOfTokenTypesFromStartingAndEndingTokenTypeNames(
                (
                    (
                        'primitiveUnionAtLx',
                        'categoryExpressionAtLx',
                    ),
                ),
            ),

##                                                       8-----  --9--                  semes are 2, 5, 8 
##                                     5------  --6--   -7-------------                CIW are 1, 3, 6, 9, 11 
##             --1--  -2-----  --3--  -----------4-----------------------   -10-  -11-  layermark is 10 
            '^([%s]*)([%s-%s])([%s]*)(([%s-%s])([%s]*)(([%s-%s])([%s]*))?)?([%s])([%s]*)' % (

                    ''.join( [
                        tokenTypeNameToRegExpableChar[ 'comment'],
                        tokenTypeNameToRegExpableChar[ 'instantiator'],
                        tokenTypeNameToRegExpableChar[ 'whitespace'],
                    ]),

                    tokenTypeNameToRegExpableChar[ 'genOpAtL0'],
                    tokenTypeNameToRegExpableChar[ 'categoryExpressionAtL%d' % ( layerCount - 1)],

                    ''.join( [
                        tokenTypeNameToRegExpableChar[ 'comment'],
                        tokenTypeNameToRegExpableChar[ 'instantiator'],
                        tokenTypeNameToRegExpableChar[ 'whitespace'],
                    ]),

                    tokenTypeNameToRegExpableChar[ 'genOpAtL0'],
                    tokenTypeNameToRegExpableChar[ 'categoryExpressionAtL%d' % ( layerCount - 1)],

                    ''.join( [
                        tokenTypeNameToRegExpableChar[ 'comment'],
                        tokenTypeNameToRegExpableChar[ 'instantiator'],
                        tokenTypeNameToRegExpableChar[ 'whitespace'],
                    ]),

                    tokenTypeNameToRegExpableChar[ 'genOpAtL0'],
                    tokenTypeNameToRegExpableChar[ 'categoryExpressionAtL%d' % ( layerCount - 1)],

                    ''.join( [
                        tokenTypeNameToRegExpableChar[ 'comment'],
                        tokenTypeNameToRegExpableChar[ 'instantiator'],
                        tokenTypeNameToRegExpableChar[ 'whitespace'],
                    ]),

                    ''.join( [
                        charRecogStr( star_fillWithCompletenessLayerMark),
                        charRecogStr( star_fillWithPrecedingSemeLayerMark),
                        charRecogStr( star_L0LayerMark),
                        charRecogStr( star_L1LayerMark),
                        charRecogStr( star_L2LayerMark),
                        charRecogStr( star_L3LayerMark),
                        charRecogStr( star_L4LayerMark),
                        charRecogStr( star_L5LayerMark),
                        charRecogStr( star_L6LayerMark),
                    ]),

                    ''.join( [
                        tokenTypeNameToRegExpableChar[ 'comment'],
                        tokenTypeNameToRegExpableChar[ 'instantiator'],
                        tokenTypeNameToRegExpableChar[ 'whitespace'],
                    ]),
            ),

            genOpError,
        ),
        (
            dictOfTokenTypesFromStartingAndEndingTokenTypeNames(
                (
                    (
                        'primitiveUnionAtLx',
                        'categoryExpressionAtLx',
                    ),
                ),
            ),

##             --1--  -----2--------
            '^([%s]*)([%s-%s][%s]*)+$' % (

                    ''.join( [
                        tokenTypeNameToRegExpableChar[ 'comment'],
                        tokenTypeNameToRegExpableChar[ 'instantiator'],
                        tokenTypeNameToRegExpableChar[ 'whitespace'],
                    ]),

                    tokenTypeNameToRegExpableChar[ 'genOpAtL0'],
                    tokenTypeNameToRegExpableChar[ 'categoryExpressionAtL%d' % ( layerCount - 1)],

                    ''.join( [
                        tokenTypeNameToRegExpableChar[ 'comment'],
                        tokenTypeNameToRegExpableChar[ 'instantiator'],
                        tokenTypeNameToRegExpableChar[ 'whitespace'],
                    ]),

            ),

            anomalousError,
        ),
        (
            dictOfTokenTypesFromStartingAndEndingTokenTypeNames(
                (                                         ## all containers with known layernumbers
                    (
                        'groupAtL0',
                        'categoryExpressionAtL%d' % ( layerCount - 1),
                    ),
                ),
            ),

##                              --4--  -5-----
##             --1--  --2----  -------3--------   --6--
            '^([%s]*)([%s-%s])(([%s]*)([%s-%s]))+([%s]*)$' % (  ## more than one operand or seme, but no layermark or operator

                    ''.join( [
                        tokenTypeNameToRegExpableChar[ 'comment'],
                        tokenTypeNameToRegExpableChar[ 'instantiator'],
                        tokenTypeNameToRegExpableChar[ 'whitespace'],
                    ]),

                    tokenTypeNameToRegExpableChar[ 'genOpAtL0'],
                    tokenTypeNameToRegExpableChar[ 'categoryExpressionAtL%d' % ( layerCount - 1)],

                    ''.join( [
                        tokenTypeNameToRegExpableChar[ 'comment'],
                        tokenTypeNameToRegExpableChar[ 'instantiator'],
                        tokenTypeNameToRegExpableChar[ 'whitespace'],
                    ]),

                    tokenTypeNameToRegExpableChar[ 'genOpAtL0'],
                    tokenTypeNameToRegExpableChar[ 'categoryExpressionAtL%d' % ( layerCount - 1)],

                    ''.join( [
                        tokenTypeNameToRegExpableChar[ 'comment'],
                        tokenTypeNameToRegExpableChar[ 'instantiator'],
                        tokenTypeNameToRegExpableChar[ 'whitespace'],
                    ]),

            ),

            anomalousError,
        ),
    ],    ## end of ttDispatchList[ 7]: helpful error detectors
]

## populate ttDispatchList[ 4]
globalsDict = globals()
localsDict = locals()
for layerNumber in range( layerCount):
    exec( 
        "ttDispatchList[ 4].append(\n\
            (\n\
                dictOfTokenTypesFromStartingAndEndingTokenTypeNames(\n\
                    (\n\
                        (\n\
                            'primitiveUnionAtLx',\n\
                            'categoryExpressionAtLx',\n\
                        ),\n\
                    ),\n\
                ),\n\
                \n\
##                --1--  --2-  --3-   -4--  --5--  -6--  --7--\n\
                '([%s]*)([%s])([%s]*)([%s])([%s]*)([%s])([%s]*)' % (\n\
                        ''.join( [\n\
                            tokenTypeNameToRegExpableChar[ 'comment'],\n\
                            tokenTypeNameToRegExpableChar[ 'instantiator'],\n\
                            tokenTypeNameToRegExpableChar[ 'whitespace'],\n\
                        ]),\n\
                        ''.join( [ \n\
                            tokenTypeNameToRegExpableChar[ 'genOpAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveUnionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveDifferenceAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveIntersectionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceUnionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceDifferenceAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceIntersectionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'groupAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'undeterminedSubsetOfAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'diagonalAtLnxmber'],\n\
                        ]),\n\
                        \n\
                        ''.join( [\n\
                            tokenTypeNameToRegExpableChar[ 'comment'],\n\
                            tokenTypeNameToRegExpableChar[ 'instantiator'],\n\
                            tokenTypeNameToRegExpableChar[ 'whitespace'],\n\
                        ]),\n\
                        \n\
                        ''.join( [ \n\
                            charRecogStr( star_primitive_union),\n\
                            charRecogStr( star_primitive_intersection),\n\
                            charRecogStr( star_primitive_difference),\n\
                            charRecogStr( star_sequence_union),\n\
                            charRecogStr( star_sequence_intersection),\n\
                            charRecogStr( star_sequence_difference),\n\
                        ]),\n\
                        \n\
                        ''.join( [\n\
                            tokenTypeNameToRegExpableChar[ 'comment'],\n\
                            tokenTypeNameToRegExpableChar[ 'instantiator'],\n\
                            tokenTypeNameToRegExpableChar[ 'whitespace'],\n\
                        ]),\n\
                        \n\
                        ''.join( [ \n\
                            tokenTypeNameToRegExpableChar[ 'genOpAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveUnionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveDifferenceAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveIntersectionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceUnionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceDifferenceAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceIntersectionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'groupAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'undeterminedSubsetOfAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'diagonalAtLnxmber'],\n\
                        ]),\n\
                        ''.join( [\n\
                            tokenTypeNameToRegExpableChar[ 'comment'],\n\
                            tokenTypeNameToRegExpableChar[ 'instantiator'],\n\
                            tokenTypeNameToRegExpableChar[ 'whitespace'],\n\
                        ]),\n\
                ),\n\
                \n\
                nonGenOpProc,\n\
            ),\n\
        )".replace( 'nxmber', '%s' % ( layerNumber)),
        globalsDict,
        localsDict,
    )

## #######################################################
## def allExcept( genreName, layerNumber):
##     ## return a string of tokenType chars of for all the tokenTypes of
##     ## a specific genre (e.g. 'genOp', 'primitiveUnion') EXCEPT for
##     ## the one at layer "layerNumber".
##     returnList = []
##     for j in range( layerCount):
##         if j == layerNumber: continue  ## don't want this one
##         returnList.append(
##             tokenTypeNameToRegExpableChar[
##                 '%sAtL%d' % ( genreName, j)
##             ]
##         )
##     return ''.join( returnList)
#######################################################
def allHigherThanLayerNumber( genreName, layerNumber):
    ## return a string of tokenType chars of for all the tokenTypes of
    ## a specific genre (e.g. 'genOp', 'primitiveUnion') HIGHER THAN
    ## the one at layer "layerNumber".
    returnList = []
    for j in range( layerNumber + 1, layerCount):
        returnList.append(
            tokenTypeNameToRegExpableChar[
                '%sAtL%d' % ( genreName, j)
            ]
        )
    return ''.join( returnList)
#######################################################

## populate ttDispatchList[ 4], continued
globalsDict = globals()
localsDict = locals()
for layerNumber in range( layerCount - 1):  ## -1 because the final layermark must be for the layer above the layer of the three operands
    exec( 
        "ttDispatchList[ 4].append(\n\
            (\n\
                dictOfTokenTypesFromStartingAndEndingTokenTypeNames(\n\
                    (\n\
                        (\n\
                            'primitiveUnionAtLx',\n\
                            'categoryExpressionAtLx',\n\
                        ),\n\
                    ),\n\
                ),\n\
                \n\
##                                                                                                        1 is LEFT WHERE IT IS!\n\
##                                                                                 --9-  --10-            semes are 3, 6, 9 \n\
##                                                                   -6--  --7--  -----8------            CIWs are 2, 4, 7, 10 \n\
##                --------1-------------------  --2--  -3--  --4--  ------------5----------------   -11-  layermark is 11 \n\
                '([%s%s%s%s%s%s][%s]*?|^[%s]*?)([%s]*)([%s])([%s]*)(([%s])([%s]*)(([%s])([%s]*))?)?([%s])' % (\n\
                        charRecogStr( star_primitive_union),\n\
                        charRecogStr( star_primitive_intersection),\n\
                        charRecogStr( star_primitive_difference),\n\
                        charRecogStr( star_sequence_union),\n\
                        charRecogStr( star_sequence_intersection),\n\
                        charRecogStr( star_sequence_difference),\n\
                        \n\
                        ''.join( [\n\
                            tokenTypeNameToRegExpableChar[ 'comment'],\n\
                            tokenTypeNameToRegExpableChar[ 'instantiator'],\n\
                            tokenTypeNameToRegExpableChar[ 'whitespace'],\n\
                            \n\
                            allHigherThanLayerNumber( 'genOp', layerNumber),\n\
                            allHigherThanLayerNumber( 'primitiveUnion', layerNumber),\n\
                            allHigherThanLayerNumber( 'primitiveDifference', layerNumber),\n\
                            allHigherThanLayerNumber( 'primitiveIntersection', layerNumber),\n\
                            allHigherThanLayerNumber( 'sequenceUnion', layerNumber),\n\
                            allHigherThanLayerNumber( 'sequenceDifference', layerNumber),\n\
                            allHigherThanLayerNumber( 'sequenceIntersection', layerNumber),\n\
                            allHigherThanLayerNumber( 'group', layerNumber),\n\
                            allHigherThanLayerNumber( 'undeterminedSubsetOf', layerNumber),\n\
                            allHigherThanLayerNumber( 'diagonal', layerNumber),\n\
                        ]),\n\
                        \n\
                        ''.join( [\n\
                            tokenTypeNameToRegExpableChar[ 'comment'],\n\
                            tokenTypeNameToRegExpableChar[ 'instantiator'],\n\
                            tokenTypeNameToRegExpableChar[ 'whitespace'],\n\
                            \n\
                            allHigherThanLayerNumber( 'genOp', layerNumber),\n\
                            allHigherThanLayerNumber( 'primitiveUnion', layerNumber),\n\
                            allHigherThanLayerNumber( 'primitiveDifference', layerNumber),\n\
                            allHigherThanLayerNumber( 'primitiveIntersection', layerNumber),\n\
                            allHigherThanLayerNumber( 'sequenceUnion', layerNumber),\n\
                            allHigherThanLayerNumber( 'sequenceDifference', layerNumber),\n\
                            allHigherThanLayerNumber( 'sequenceIntersection', layerNumber),\n\
                            allHigherThanLayerNumber( 'group', layerNumber),\n\
                            allHigherThanLayerNumber( 'undeterminedSubsetOf', layerNumber),\n\
                            allHigherThanLayerNumber( 'diagonal', layerNumber),\n\
                        ]),\n\
                        \n\
                        ''.join( [\n\
                            tokenTypeNameToRegExpableChar[ 'comment'],\n\
                            tokenTypeNameToRegExpableChar[ 'instantiator'],\n\
                            tokenTypeNameToRegExpableChar[ 'whitespace'],\n\
                        ]),\n\
                        \n\
                        ''.join( [ \n\
                            tokenTypeNameToRegExpableChar[ 'genOpAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveUnionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveDifferenceAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveIntersectionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceUnionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceDifferenceAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceIntersectionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'groupAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'undeterminedSubsetOfAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'diagonalAtLnxmber'],\n\
                        ]),\n\
                        \n\
                        ''.join( [\n\
                            tokenTypeNameToRegExpableChar[ 'comment'],\n\
                            tokenTypeNameToRegExpableChar[ 'instantiator'],\n\
                            tokenTypeNameToRegExpableChar[ 'whitespace'],\n\
                        ]),\n\
                        \n\
                        ''.join( [ \n\
                            tokenTypeNameToRegExpableChar[ 'genOpAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveUnionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveDifferenceAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveIntersectionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceUnionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceDifferenceAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceIntersectionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'groupAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'undeterminedSubsetOfAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'diagonalAtLnxmber'],\n\
                        ]),\n\
                        \n\
                        ''.join( [\n\
                            tokenTypeNameToRegExpableChar[ 'comment'],\n\
                            tokenTypeNameToRegExpableChar[ 'instantiator'],\n\
                            tokenTypeNameToRegExpableChar[ 'whitespace'],\n\
                        ]),\n\
                        \n\
                        ''.join( [ \n\
                            tokenTypeNameToRegExpableChar[ 'genOpAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveUnionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveDifferenceAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveIntersectionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceUnionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceDifferenceAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceIntersectionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'groupAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'undeterminedSubsetOfAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'diagonalAtLnxmber'],\n\
                        ]),\n\
                        \n\
                        ''.join( [\n\
                            tokenTypeNameToRegExpableChar[ 'comment'],\n\
                            tokenTypeNameToRegExpableChar[ 'instantiator'],\n\
                            tokenTypeNameToRegExpableChar[ 'whitespace'],\n\
                        ]),\n\
                        \n\
                        ''.join( [\n\
                            charRecogStr( star_fillWithCompletenessLayerMark),\n\
                            charRecogStr( star_fillWithPrecedingSemeLayerMark),\n\
                            charRecogStr( star_Lnxmplus1LayerMark),\n\
                        ]),\n\
                ),\n\
                \n\
                genOpProc,\n\
            ),\n\
        )".replace( 'nxmber', '%s' % (
            layerNumber,
          )).replace( 'nxmplus1', '%s' % (
            layerNumber + 1,
          ),
        ),
        globalsDict,
        localsDict,
    )
## populate ttDispatchList[ 4], continued
globalsDict = globals()
localsDict = locals()
for layerNumber in range( layerCount):  ## [ 0, 1, 2, 3, 4, 5, 6]:
    exec( 
        "ttDispatchList[ 4].append(\n\
            (\n\
                dictOfTokenTypesFromStartingAndEndingTokenTypeNames(\n\
                    (\n\
                        (\n\
                            'primitiveUnionAtLx',\n\
                            'categoryExpressionAtLx',\n\
                        ),\n\
                    ),\n\
                ),\n\
                \n\
##                  --2-   -3-   --4-  \n\
##                 ----------1--------- \n\
                '^(([%s]*)([%s])([%s]*))$' % (\n\
                        ''.join( [ \n\
                            tokenTypeNameToRegExpableChar[ 'whitespace'],  ## irrelevant to layer determination\n\
                            tokenTypeNameToRegExpableChar[ 'comment'],  ## irrelevant to layer determination\n\
                            tokenTypeNameToRegExpableChar[ 'instantiator'],  ## irrelevant to layer determination\n\
                        ]),\n\
                        \n\
                        ''.join( [ \n\
                            tokenTypeNameToRegExpableChar[ 'primitiveUnionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveDifferenceAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'primitiveIntersectionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceUnionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceDifferenceAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'sequenceIntersectionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'groupAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'undeterminedSubsetOfAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'diagonalAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'categoryExpressionAtLnxmber'],\n\
                            tokenTypeNameToRegExpableChar[ 'genOpAtLnxmber'],\n\
                        ]),\n\
                        \n\
                        ''.join( [ \n\
                            tokenTypeNameToRegExpableChar[ 'whitespace'],  ## irrelevant to layer determination\n\
                            tokenTypeNameToRegExpableChar[ 'comment'],  ## irrelevant to layer determination\n\
                            tokenTypeNameToRegExpableChar[ 'instantiator'],  ## irrelevant to layer determination\n\
                        ]),\n\
                ),\n\
                \n\
                promotionProc1,\n\
            ),\n\
        )".replace( 'nxmber', '%s' % ( layerNumber)),
        globalsDict,
        localsDict,
    )

ctr = 0
while True:
    if ctr >= len( ttDispatchList): break
    newTTDispatch = []
    for item in ttDispatchList[ ctr]:
        try:
            newTTDispatch.append( (
                item[ 0],
                re.compile( item[ 1], re.DOTALL),
                item[ 2],
            ))    
        except:
            import pdb
            pdb.set_trace()
    ttDispatchList[ ctr] = newTTDispatch
    ctr += 1

#######################################################
def prettyPrintAndOrValidateXml( xmlString):
    global validateXml, showHelperCommand

    commandStringList = []
    commandStringList.append( 'python2.6')
    commandStringList.append(
        os.path.join(
            os.path.split( sys.argv[ 0])[ 0],
            'starprettyvalid.py',
        ),
    )
    commandStringList.extend( [
        '-i',
        '-',
        '-inputEncoding',
        'unicode_escape',
    ])
    if validateXml:
        commandStringList.extend( [
            '-validateXml',
            'True',
        ])
    if prettyXml:
        commandStringList.extend( [
            '-prettyXml',
            '-',
        ])

    if showHelperCommand:
        writeOutput( internalErrorsStreamFO, 'Helper command: %s\n' % ( ' '.join( commandStringList)), '-errorsStream')

    subProc = subprocess.Popen(
        commandStringList,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    try:
        stdout, stderr = subProc.communicate( xmlString.encode( 'unicode_escape'))
    except OSError as e:
        errMsg( 'internal error: %s\ncommand was:\n%s\nxmlAsParsedStream was %s' % (
            e,
            ' '.join( commandStringList),
            xmlAsParsedStream,
        ))
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)
    if subProc.returncode != 0:
        if xmlAsParsedStream not in [ None, '-', '--', '.']:
            lineNumbersWarning = '\n(Line numbers mentioned below are accurate for the file "%s".)\n' % ( os.path.abspath( xmlAsParsedStream))
        else:
            lineNumbersWarning = '\n(If you use the -xmlAsParsed option, you will have a file\nfor which the line numbers mentioned below are accurate.)\n'
        writeOutput(
            internalErrorsStreamFO,
##             'internal error; either the XML is invalid, or the starprettyvalid.py script is not functioning correctly.\n%s%s\n' % (
##                 stdout.decode( 'unicode_escape'),
##                 stderr.decode( 'unicode_escape'),
##             ),
            'There are errors in the Star-XML output.  Please report this defect in %s.\n%s%s\n' % (
                os.path.split( sys.argv[ 0])[ 1],
                lineNumbersWarning,
                stderr.decode( 'unicode_escape'),
            ),
            '-internalErrors',
        )
        if tokenTweakDebug:
            import pdb
            pdb.set_trace()
        sys.exit( 1)
    if prettyXml:
        return stdout.decode( 'unicode_escape')
    else:
        return None

#######################################################
def xmlPrologueStr( note = '', **kwargs):
    global dtdStringAfterParameterEntityExpansion, includeDtd

    msgList = []
    if 'encoding' in kwargs:
        if kwargs[ 'encoding'] is None:
            encodingStr = ''
        else:
            encodingStr = ' encoding="%s"' % ( kwargs[ 'encoding'])
    else:
        encodingStr = ''

    msgList.append( '<?xml version="1.0"%s?>\n' % ( encodingStr))
    if note != '':
        msgList.append( '%s\n' % ( note))
        
    if includeDtd or ( 'includeDtd' in kwargs):
        msgList.append( '<!DOCTYPE ieml [\n%s\n]>\n\n' % ( dtdStringAfterParameterEntityExpansion))

    return ''.join( msgList)

#######################################################
def xmlifyAttValue( attValue, **kwargs):

    if isinstance( attValue, int):
        attValue = '%s' % ( attValue)
    elif attValue is None:
        attValue = '\nNone\n'  ## sequence operations not yet supported

    protectNewlines = prettyXml  ## if we're pretty-printing the XML, then we must protect the newlines or they will be replaced by spaces by the XML parser. (:^(
    if 'protectNewlines' in kwargs:
        protectNewlines = kwargs[ 'protectNewlines']

    if protectNewlines:
        return attValue.replace( '&', '&amp;').replace( '<', '&lt;').replace( '>', '&gt;').replace( '"', '&#34;').replace( '\x0d\x0a', '&#xd;&#xa;').replace( '\x0a', '&#xa;').replace( '\x0d', '&#xd;')
    else:
        return attValue.replace( '&', '&amp;').replace( '<', '&lt;').replace( '>', '&gt;').replace( '"', '&#34;')

#######################################################
def xmlifyContent( content):

    return content.replace( '&', '&amp;').replace( '<', '&lt;').replace( '>', '&gt;').replace( '\x0d\x0a', '&#xd;&#xa;').replace( '\x0a', '&#xd;&#xa;').replace( '\x0d', '&#xd;&#xa;')

#######################################################
tokenTypeNameRE = re.compile( '^(.+?)(AtL[0-%d])$' % ( layerCount - 1))
def tokenTree2Xml():
    global iemlExpressionToken, xmlStrList, jsonDict, iemlExpressionString

    #######################################################
    def _tokenTree2Xml( token, jsonD):
        global xmlStrList

        #######################################################
        def iemlBSoloToXmlAttVal( solo):
            """
            The "solo" argument can be a frozenset, a set, or an IemlSolo instance.
            """
            #######################################################
            def binscToHex( binsc):
                cList = []
                for c in binsc:
                    cList.append( '%02x' % ( c))
                return ''.join( cList)
            #######################################################

            if isinstance( solo, frozenset) or isinstance( solo, set):
                binscs = sorted( list( solo))

            elif isinstance( solo, IemlSolo):
                binscs = sorted( list( solo.binscs))

            elif solo is None:
                return None

            else:
                errMsg( 'internal error: solo is of unexpected type "%s"; "%s"' % (
                    type( solo),
                    repr( solo),
                ))
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)


            msgList = [ '\n']
            for binsc in binscs:
                msgList.append( '%s\n' % ( binscToHex( binsc)))
            return ''.join( msgList)

        #######################################################
        def iemlBSoloToJsonVal( solo):
            if solo is None:
                return 'could not be calculated'
            elif not isinstance( solo, frozenset):
                errMsg( 'internal error: solo is of unexpected type "%s"; "%s"' % (
                    type( solo),
                    repr( solo),
                ))
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)
            return sorted( list( solo))

        #######################################################
        def attributeList( token, jsonD):
            global xmlStrList, tokenDisp, iemlExpressionString, binary

            attList = []

            ## tokenDisp attribute
            if tokenDisp and hasattr( token, 'first'):
                attList.append(
                    ' tokenDisp="%s"\n' % (
                        showPlaceInString(
                            iemlExpressionString,
                            token.first,
                            token.last,
                            protectNewlines = prettyXml,  ## if we're doing prettyXml processing, protect the newlines.  Otherwise not.
                            xmlEscapeForAttributeValue = True,
                        )
                    )
                )

            ## lots of attributes
            attributeNameTuples = []
            if firstLast:
                attributeNameTuples.extend( [
                    ( 'first', 'first'),  ## each attributeNameTuple[ 0] is the token's pyAttName, [ 1] is the XML attname and JSON dictkey
                    ( 'last', 'last'),
                    ( 'opFirst', 'opFirst'),
                    ( 'opLast', 'opLast'),
                    ( 'piFirst', 'piFirst'),
                    ( 'piLast', 'piLast'),
                    ( 'symFirst', 'symFirst'),
                    ( 'symLast', 'symLast'),
                ])

            attributeNameTuples.extend( [
                ( 'layerMark', 'layerMark'),
                ( 'layerNumber', 'layerNumber'),
                ( 'operator', 'operator'),
                ( 'parameterIdentifier', 'parameterIdentifier'),
                ( 'roleNumber', 'roleNumber'),
                ( 'implicit', 'implicit'),
                ( 'symbol', 'symbol'),
            ])
            for attributeNameTuple in attributeNameTuples:
                if hasattr( token, attributeNameTuple[ 0]):
                    attList.append(
                        '%s="%s"\n' % (
                            attributeNameTuple[ 1],
                            xmlifyAttValue( getattr( token, attributeNameTuple[ 0])),
                        )
                    )
                    jsonD[ attributeNameTuple[ 1]] = ( '%s' % ( getattr( token, attributeNameTuple[ 0])))

            ## binary attribute
            if binary and hasattr( token, 'binary'):
                attList.append(
                    'binary="%s"' % (
                        xmlifyAttValue(
                            iemlBSoloToXmlAttVal(
                                token.binary,
                            ),
                            protectNewlines = prettyXml,  ## if we're doing prettyXml processing, protect the newlines.  Otherwise not.
                        )
                    )
                )
                jsonD[ 'binary'] = iemlBSoloToJsonVal( token.binary)

            ## containedIn attribute
            if hasattr( token, 'containedIn') and len( token.containedIn) > 0:

                xmlContainedInList = []
                jsonContainedInList = []
                jsonD[ 'containedIn'] = jsonContainedInList

                for containerToken in token.containedIn:
##                    tokenTypeNameMO = tokenTypeNameRE.search( containerToken.tokenTypeName())

                    jsonC2 = {}  ## a weeny little token
                    jsonContainedInList.append( jsonC2)
##                    jsonC2[ 'type'] = tokenTypeNameMO.group( 1)
                    jsonC2[ 'type'] = containerToken.tokenTypeName()

                    try:
                        parameterIdentifier = containerToken.parameterIdentifier
                    except:
                        parameterIdentifier = None
                    jsonC2[ 'parameterIdentifier'] = parameterIdentifier

                    try:
                        first = containerToken.first
                    except:
                        first = None
                    if firstLast:    
                        jsonC2[ 'first'] = first

                    try:
                        last = containerToken.last
                    except:
                        last = None
                    if firstLast:    
                        jsonC2[ 'last'] = last

                    try:
                        piFirst = containerToken.piFirst
                    except:
                        piFirst = None
                    if firstLast:    
                        jsonC2[ 'piFirst'] = piFirst

                    try:
                        piLast = containerToken.piLast
                    except:
                        piLast = None
                    if firstLast:    
                        jsonC2[ 'piLast'] = piLast

                    xmlContainedInList.append(
                        xmlifyAttValue( 
                            '%s %s %s %s %s %s' % (
#                                tokenTypeNameMO.group( 1),
                                containerToken.tokenTypeName(),
                                parameterIdentifier,
                                first,
                                last,
                                piFirst,
                                piLast,
                            ),
                        ),
                    )
                attList.append( 'containedIn="%s"\n' % ( ' '.join( xmlContainedInList)))

            ## primitiveSetAttribute
            if hasattr( token, 'primitiveSet'):
                attList.append( 'primitiveSet="%s"\n' % ( sorted( list( token.primitiveSet), key=primitiveOrder)))
                jsonD[ 'primitiveSet'] = sorted( list( token.primitiveSet), key=primitiveOrder)

            return ' '.join( attList)

        #######################################################


        jsonD[ 'type'] = token.tokenTypeName()

        if token.tokenTypeName() == 'whitespace':
            xmlStrList.append( '<whitespace>%s</whitespace\n>' % ( xmlifyContent( token.text)))
            jsonD[ 'text'] = token.text
            return

        elif token.tokenTypeName() == 'comment':
            xmlStrList.append( '<comment>%s</comment\n>' % ( xmlifyContent( token.text)))
            jsonD[ 'text'] = token.text
            return

        elif token.tokenTypeName() == 'instantiator':
            xmlStrList.append( '<instantiator>')
            jsonD[ 'instantiatorContent'] = []
            for iToken in token.content:
                if iToken.tokenTypeName() == 'instantiatorContent':
                    xmlStrList.append( iToken.text)
                    jsonD[ 'instantiatorContent'].append( iToken.text)
                elif iToken.tokenTypeName() == 'comment':
                    jsonD[ 'instantiatorContent'].append( {})
                    _tokenTree2Xml( iToken, jsonD[ 'instantiatorContent'][ -1])
                else:
                    errMsg( 'internal error')
                    if tokenTweakDebug:
                        import pdb
                        pdb.set_trace()
                    sys.exit( 1)
            xmlStrList.append( '</instantiator\n>')
            return

        xmlStrList.append( '<%s %s>' % (
            token.tokenTypeName(),
            attributeList( token, jsonD),
        ))
        
        for ciwAttributeName in [
            'preGenOpCIW',
            'postSeme1CIW',
            'postSeme2CIW',
            'postSeme3CIW',
            'postGenOpCIW',
            'preNonGenOpCIW',
            'preOperatorCIW',
            'postOperatorCIW',
            'postNonGenOpCIW',
            'postParameterIdentifierCIW',
        ]:
            if hasattr( token, ciwAttributeName):
                xmlStrList.append( '<%s>' % ( ciwAttributeName))
                for ciwToken in getattr( token, ciwAttributeName):
                    if ciwAttributeName not in jsonD:
                        jsonD[ ciwAttributeName] = []
                    jsonD[ ciwAttributeName].append( {})
                    _tokenTree2Xml( ciwToken, jsonD[ ciwAttributeName][ -1])
                xmlStrList.append( '</%s\n>' % ( ciwAttributeName))

        if token.tokenTypeName() == 'usl':
            for layerNumber in range( layerCount):
                gi = 'complexAtL%d' % ( layerNumber)
                if hasattr( token, gi):
                    xmlStrList.append(
                        '<%s>' % ( gi),
                    )
                    jsonD[ gi] = []
                    for catToken in getattr( token, gi):
                        jsonD[ gi].append( {})
                        _tokenTree2Xml( catToken, jsonD[ gi][ -1])
                    xmlStrList.append(
                        '</%s\n>' % ( gi),
                    )
        if hasattr( token, 'operator'):  ## it's a nonGenOp
            jsonD[ 'operator'] = []
            for opToken in token.operands:
                jsonD[ 'operator'].append( {})
                _tokenTree2Xml( opToken, jsonD[ 'operator'][ -1])
        
        if hasattr( token, 'semes'):  ## it's a genOpAtL1-6
            jsonD[ 'semes'] = []
            for semeToken in token.semes:
                jsonD[ 'semes'].append( {})
                _tokenTree2Xml( semeToken, jsonD[ 'semes'][ -1])
        
        if (
            token.tokenTypeName().startswith( 'group') or
            token.tokenTypeName().startswith( 'categoryExpression') or
            token.tokenTypeName().startswith( 'undeterminedSubsetOf') or
            token.tokenTypeName().startswith( 'diagonal')
        ):
            jsonD[ 'content'] = []
            for containerToken in token.zubTokenLists[ -1]:
                jsonD[ 'content'].append( {})
                _tokenTree2Xml( containerToken, jsonD[ 'content'][ -1])

        if token.tokenTypeName() == 'comment':
            jsonD[ 'text'] = token.text
            xmlStrList.append( xmlifyContent( token.text))

        if token.tokenTypeName() == 'instantiator':
            pass

        xmlStrList.append( '</%s\n>' % (
            token.tokenTypeName(),
        ))
    #######################################################

    xmlStrList = []
    
    jsonDict = {}

    xmlStrList.append( '<ieml ')
    jsonDict[ 'type'] = 'ieml'

    if hasattr( iemlExpressionToken, 'prologueText'):
        xmlStrList.append( ' prologue="%s"\n' % (
            xmlifyAttValue( iemlExpressionToken.prologueText),
        ))
        jsonDict[ 'prologue'] = iemlExpressionToken.prologueText

    if hasattr( iemlExpressionToken, 'epilogueText'):
        xmlStrList.append( ' epilogue="%s"\n' % (
            xmlifyAttValue( iemlExpressionToken.epilogueText),
        ))
        jsonDict[ 'epilogue'] = iemlExpressionToken.epilogueText

    xmlStrList.append( ' parser="%s %s"\n' % (
        xmlifyAttValue( os.path.split( sys.argv[ 0])[ 1]),
        xmlifyAttValue( STARPARSER_VERSION),
    ))
    jsonDict[ 'parser'] = '%s' % (
        (
            '%s %s' % ( 
                os.path.split( sys.argv[ 0])[ 1],
                STARPARSER_VERSION,
            )
        )
    )

    xmlStrList.append( ' expression="%s"\n' % (
        xmlifyAttValue( iemlExpressionString[ 1:])  ## remove the artificial leading newline
    ))
    jsonDict[ 'expression'] = '%s' % (
        iemlExpressionString[ 1:].rstrip()  ## remove the artificial leading newline.  Strip any trailing whitespace (per Justin McKillican's request)
    )

    jsonDict[ 'star'] = {}

    xmlStrList.append( '>')
    _tokenTree2Xml( iemlExpressionToken, jsonDict[ 'star'])  ## this appends everything to xmlStrList, which is global
    xmlStrList.append( '</ieml>\n')

    return ''.join( xmlStrList)

#######################################################
def completeOrEmptyTokenAtLayer( layerNumber, completeOrEmpty, first, last,):
    """
Return a complete or empty token at the given layer.
LayerNumber is an integer, 0-6.
completeOrEmpty is either 'I' (complete) or 'E' (empty)
    """
    token = Token(
        'genOpAtL%d' % ( layerNumber),
        [],
        first = first,
        last = last,
        layerNumber = layerNumber,
    )

    if layerNumber == 0:
        token.primitiveSet = L0SymbolsToPrimitiveSets[ completeOrEmpty]
        token.symbol = completeOrEmpty
        return token

    layerNumber -= 1
    roleNumber = 1
    while roleNumber <= 3:
        if not hasattr( token, 'semes'):
            token.semes = []
        token.semes.append(
            completeOrEmptyTokenAtLayer( layerNumber, completeOrEmpty, first, last)
        )
        token.semes[ -1].roleNumber = roleNumber
        roleNumber += 1

    layerNumber -= 1
    return token

#######################################################
def addImplicitSemesToEverything():
##    for layerNumber in [ 5,4,3,2,1]:
    for layerNumber in range( layerCount - 1, 0, -1):
        tokenTypeName = 'genOpAtL%d' % ( layerNumber)
        tokenTypeChar = tokenTypeNameToRegExpableChar[ tokenTypeName]
        if tokenTypeChar not in tokensDict[ 'tokenType']: continue
        for tokenId in tokensDict[ 'tokenType'][ tokenTypeChar]:
            token = tokensDict[ 'tokenType'][ tokenTypeChar][ tokenId]
            if len( token.semes) == 3: continue

            lastSeme = token.semes[ -1]
            if (
                ( not hasattr( lastSeme, 'layerMark')) or ## there's no layermark of any kind, so fill with E
                ( lastSeme.layerMark not in [
                    star_fillWithCompletenessLayerMark,
                    star_fillWithPrecedingSemeLayerMark,
                ]) ## it's a real layermark (not one of the "fillWith" layermarks), so fill with E
            ):

                semeCtr = len( token.semes) + 1 ## so if there is 1 existing seme, semeCtr == 2
                if hasattr( lastSeme, 'layerMarkTokens'):
                    implicitSemeFirstValue = lastSeme.layerMarkTokens[ 0].first
                else:
                    implicitSemeFirstValue = token.layerMarkTokens[ -1].last
                while semeCtr <= 3:
                    token.semes.append(
                        completeOrEmptyTokenAtLayer(
                            layerNumber - 1,
                            'E',
                            implicitSemeFirstValue,
                            token.layerMarkTokens[ -1].last,
                        )
                    )
                    token.semes[ -1].roleNumber = semeCtr
                    token.semes[ -1].implicit = True
                    semeCtr += 1
            elif lastSeme.layerMark == '~':     ## fill with I
                semeCtr = len( token.semes) + 1 ## so if there is 1 existing seme, semeCtr == 2
                while semeCtr <= 3:
                    token.semes.append(
                        completeOrEmptyTokenAtLayer(
                            layerNumber - 1,
                            'I',
                            lastSeme.layerMarkTokens[ 0].first,
                            token.layerMarkTokens[ -1].last,
                        )
                    )
                    token.semes[ -1].roleNumber = semeCtr
                    token.semes[ -1].implicit = True
                    semeCtr += 1
            elif lastSeme.layerMark == '!':   ## duplicate the last existing seme until the roles are filled
                semeCtr = len( token.semes) + 1 ## so if there is 1 existing seme, semeCtr == 2
                while semeCtr <= 3:
                    token.semes.append(
                        lastSeme.copy()
                    )
                    token.semes[ -1].first = lastSeme.layerMarkTokens[ 0].first
                    token.semes[ -1].last = token.layerMarkTokens[ -1].last
                    token.semes[ -1].roleNumber = semeCtr
                    token.semes[ -1].implicit = True
                    semeCtr += 1
            else:
                errMsg( 'internal error: lastSeme.layerMark == "%s"' % ( lastSeme.layerMark))
                if tokenTweakDebug:
                    import pdb
                    pdb.set_trace()
                sys.exit( 1)

            ## A:.  -> A:E:E:.
            ##  ^^

            ## A!.  -> A:A:A:.
            ##  ^^

            ## A~.  -> A:I:I:.
            ##  ^^


#####################################################
################ STARPARSER ENDS HERE ###############
#####################################################


##################################
### ENCODING STUFF BEGINS HERE ###
##################################
#######################################################
IANAEncodingsDict = { ## tuple[ 1] is True if it is the preferred MIME name for the encoding
                            'Adobe-Symbol-Encoding': [
                                                          ('Adobe-Symbol-Encoding', False),
                                                          ('csHPPSMath', False),
                                                     ],
                                   'ANSI_X3.4-1968': [
                                                          ('ANSI_X3.4-1968', False),
                                                          ('ANSI_X3.4-1986', False),
                                                          ('ASCII', False),
                                                          ('cp367', False),
                                                          ('csASCII', False),
                                                          ('IBM367', False),
                                                          ('iso-ir-6', False),
                                                          ('ISO646-US', False),
                                                          ('ISO_646.irv:1991', False),
                                                          ('us', False),
                                                          ('US-ASCII', True),
                                                     ],
                                 'ANSI_X3.110-1983': [
                                                          ('ANSI_X3.110-1983', False),
                                                          ('CSA_T500-1983', False),
                                                          ('csISO99NAPLPS', False),
                                                          ('iso-ir-99', False),
                                                          ('NAPLPS', False),
                                                     ],
                          'Adobe-Standard-Encoding': [
                                                          ('Adobe-Standard-Encoding', False),
                                                          ('csAdobeStandardEncoding', False),
                                                     ],
                                         'ASMO_449': [
                                                          ('arabic7', False),
                                                          ('ASMO_449', False),
                                                          ('csISO89ASMO449', False),
                                                          ('iso-ir-89', False),
                                                          ('ISO_9036', False),
                                                     ],
                                       'Amiga-1251': [
                                                          ('Ami-1251', False),
                                                          ('Ami1251', False),
                                                          ('Amiga-1251', False),
                                                          ('Amiga1251', False),
                                                     ],
                                       'Big5-HKSCS': [
                                                          ('Big5-HKSCS', False),
                                                     ],
                                             'Big5': [
                                                          ('Big5', True),
                                                          ('csBig5', False),
                                                     ],
                                      'BS_viewdata': [
                                                          ('BS_viewdata', False),
                                                          ('csISO47BSViewdata', False),
                                                          ('iso-ir-47', False),
                                                     ],
                                              'BRF': [
                                                          ('BRF', False),
                                                          ('csBRF', False),
                                                     ],
                                          'BS_4730': [
                                                          ('BS_4730', False),
                                                          ('csISO4UnitedKingdom', False),
                                                          ('gb', False),
                                                          ('iso-ir-4', False),
                                                          ('ISO646-GB', False),
                                                          ('uk', False),
                                                     ],
                                           'BOCU-1': [
                                                          ('BOCU-1', False),
                                                          ('csBOCU-1', False),
                                                     ],
                                       'CSN_369103': [
                                                          ('csISO139CSN369103', False),
                                                          ('CSN_369103', False),
                                                          ('iso-ir-139', False),
                                                     ],
                                           'CESU-8': [
                                                          ('CESU-8', False),
                                                          ('csCESU-8', False),
                                                     ],
                               'CSA_Z243.4-1985-gr': [
                                                          ('CSA_Z243.4-1985-gr', False),
                                                          ('csISO123CSAZ24341985gr', False),
                                                          ('iso-ir-123', False),
                                                     ],
                                'CSA_Z243.4-1985-1': [
                                                          ('ca', False),
                                                          ('csa7-1', False),
                                                          ('CSA_Z243.4-1985-1', False),
                                                          ('csISO121Canadian1', False),
                                                          ('iso-ir-121', False),
                                                          ('ISO646-CA', False),
                                                     ],
                                'CSA_Z243.4-1985-2': [
                                                          ('csa7-2', False),
                                                          ('CSA_Z243.4-1985-2', False),
                                                          ('csISO122Canadian2', False),
                                                          ('iso-ir-122', False),
                                                          ('ISO646-CA2', False),
                                                     ],
                                            'dk-us': [
                                                          ('csDKUS', False),
                                                          ('dk-us', False),
                                                     ],
                                        'DIN_66003': [
                                                          ('csISO21German', False),
                                                          ('de', False),
                                                          ('DIN_66003', False),
                                                          ('iso-ir-21', False),
                                                          ('ISO646-DE', False),
                                                     ],
                                          'DS_2089': [
                                                          ('csISO646Danish', False),
                                                          ('dk', False),
                                                          ('DS2089', False),
                                                          ('DS_2089', False),
                                                          ('ISO646-DK', False),
                                                     ],
                                          'DEC-MCS': [
                                                          ('csDECMCS', False),
                                                          ('dec', False),
                                                          ('DEC-MCS', False),
                                                     ],
                                   'EBCDIC-AT-DE-A': [
                                                          ('csEBCDICATDEA', False),
                                                          ('EBCDIC-AT-DE-A', False),
                                                     ],
                                        'EBCDIC-IT': [
                                                          ('csEBCDICIT', False),
                                                          ('EBCDIC-IT', False),
                                                     ],
                                     'EBCDIC-CA-FR': [
                                                          ('csEBCDICCAFR', False),
                                                          ('EBCDIC-CA-FR', False),
                                                     ],
                                     'EBCDIC-DK-NO': [
                                                          ('csEBCDICDKNO', False),
                                                          ('EBCDIC-DK-NO', False),
                                                     ],
                                        'EBCDIC-US': [
                                                          ('csEBCDICUS', False),
                                                          ('EBCDIC-US', False),
                                                     ],
                                        'EBCDIC-ES': [
                                                          ('csEBCDICES', False),
                                                          ('EBCDIC-ES', False),
                                                     ],
                                   'EBCDIC-FI-SE-A': [
                                                          ('csEBCDICFISEA', False),
                                                          ('EBCDIC-FI-SE-A', False),
                                                     ],
                                      'EBCDIC-ES-S': [
                                                          ('csEBCDICESS', False),
                                                          ('EBCDIC-ES-S', False),
                                                     ],
                                     'EBCDIC-AT-DE': [
                                                          ('csIBMEBCDICATDE', False),
                                                          ('EBCDIC-AT-DE', False),
                                                     ],
                                               'ES': [
                                                          ('csISO17Spanish', False),
                                                          ('ES', False),
                                                          ('iso-ir-17', False),
                                                          ('ISO646-ES', False),
                                                     ],
                                           'EUC-KR': [
                                                          ('csEUCKR', False),
                                                          ('EUC-KR', True),
                                                     ],
                                     'EBCDIC-FI-SE': [
                                                          ('csEBCDICFISE', False),
                                                          ('EBCDIC-FI-SE', False),
                                                     ],
                                        'EBCDIC-PT': [
                                                          ('csEBCDICPT', False),
                                                          ('EBCDIC-PT', False),
                                                     ],
                                      'EBCDIC-ES-A': [
                                                          ('csEBCDICESA', False),
                                                          ('EBCDIC-ES-A', False),
                                                     ],
      'Extended_UNIX_Code_Fixed_Width_for_Japanese': [
                                                          ('csEUCFixWidJapanese', False),
                                                          ('Extended_UNIX_Code_Fixed_Width_for_Japanese', False),
                                                     ],
                                   'EBCDIC-DK-NO-A': [
                                                          ('csEBCDICDKNOA', False),
                                                          ('EBCDIC-DK-NO-A', False),
                                                     ],
                                        'EBCDIC-UK': [
                                                          ('csEBCDICUK', False),
                                                          ('EBCDIC-UK', False),
                                                     ],
                                              'ES2': [
                                                          ('csISO85Spanish2', False),
                                                          ('ES2', False),
                                                          ('iso-ir-85', False),
                                                          ('ISO646-ES2', False),
                                                     ],
                                    'ECMA-cyrillic': [
                                                          ('csISO111ECMACyrillic', False),
                                                          ('ECMA-cyrillic', False),
                                                          ('iso-ir-111', False),
                                                          ('KOI8-E', False),
                                                     ],
    'Extended_UNIX_Code_Packed_Format_for_Japanese': [
                                                          ('csEUCPkdFmtJapanese', False),
                                                          ('EUC-JP', True),
                                                          ('Extended_UNIX_Code_Packed_Format_for_Japanese', False),
                                                     ],
                                        'EBCDIC-FR': [
                                                          ('csEBCDICFR', False),
                                                          ('EBCDIC-FR', False),
                                                     ],
                                    'GOST_19768-74': [
                                                          ('csISO153GOST1976874', False),
                                                          ('GOST_19768-74', False),
                                                          ('iso-ir-153', False),
                                                          ('ST_SEV_358-88', False),
                                                     ],
                                          'GB18030': [
                                                          ('GB18030', False),
                                                     ],
                                       'GB_2312-80': [
                                                          ('chinese', False),
                                                          ('csISO58GB231280', False),
                                                          ('GB_2312-80', False),
                                                          ('iso-ir-58', False),
                                                     ],
                                       'greek7-old': [
                                                          ('csISO18Greek7Old', False),
                                                          ('greek7-old', False),
                                                          ('iso-ir-18', False),
                                                     ],
                                              'GBK': [
                                                          ('CP936', False),
                                                          ('GBK', False),
                                                          ('MS936', False),
                                                          ('windows-936', False),
                                                     ],
                                      'greek-ccitt': [
                                                          ('csISO150', False),
                                                          ('csISO150GreekCCITT', False),
                                                          ('greek-ccitt', False),
                                                          ('iso-ir-150', False),
                                                     ],
                                           'greek7': [
                                                          ('csISO88Greek7', False),
                                                          ('greek7', False),
                                                          ('iso-ir-88', False),
                                                     ],
                                           'GB2312': [
                                                          ('csGB2312', False),
                                                          ('GB2312', True),
                                                     ],
                                       'GB_1988-80': [
                                                          ('cn', False),
                                                          ('csISO57GB1988', False),
                                                          ('GB_1988-80', False),
                                                          ('iso-ir-57', False),
                                                          ('ISO646-CN', False),
                                                     ],
                                       'HP-Pi-font': [
                                                          ('csHPPiFont', False),
                                                          ('HP-Pi-font', False),
                                                     ],
                                         'HP-Legal': [
                                                          ('csHPLegal', False),
                                                          ('HP-Legal', False),
                                                     ],
                                        'hp-roman8': [
                                                          ('csHPRoman8', False),
                                                          ('hp-roman8', False),
                                                          ('r8', False),
                                                          ('roman8', False),
                                                     ],
                                         'HP-Math8': [
                                                          ('csHPMath8', False),
                                                          ('HP-Math8', False),
                                                     ],
                                       'HP-DeskTop': [
                                                          ('csHPDesktop', False),
                                                          ('HP-DeskTop', False),
                                                     ],
                                       'HZ-GB-2312': [
                                                          ('HZ-GB-2312', False),
                                                     ],
                                         'IBM01141': [
                                                          ('CCSID01141', False),
                                                          ('CP01141', False),
                                                          ('ebcdic-de-273+euro', False),
                                                          ('IBM01141', False),
                                                     ],
                                  'ISO_8859-9:1989': [
                                                          ('csISOLatin5', False),
                                                          ('ISO-8859-9', True),
                                                          ('iso-ir-148', False),
                                                          ('ISO_8859-9', False),
                                                          ('ISO_8859-9:1989', False),
                                                          ('l5', False),
                                                          ('latin5', False),
                                                     ],
                                        'IEC_P27-1': [
                                                          ('csISO143IECP271', False),
                                                          ('IEC_P27-1', False),
                                                          ('iso-ir-143', False),
                                                     ],
                                      'ISO-11548-1': [
                                                          ('csISO115481', False),
                                                          ('ISO-11548-1', False),
                                                          ('ISO_11548-1', False),
                                                          ('ISO_TR_11548-1', False),
                                                     ],
                                           'IBM865': [
                                                          ('865', False),
                                                          ('cp865', False),
                                                          ('csIBM865', False),
                                                          ('IBM865', False),
                                                     ],
                                        'INVARIANT': [
                                                          ('csINVARIANT', False),
                                                          ('INVARIANT', False),
                                                     ],
                                         'IBM01147': [
                                                          ('CCSID01147', False),
                                                          ('CP01147', False),
                                                          ('ebcdic-fr-297+euro', False),
                                                          ('IBM01147', False),
                                                     ],
                                           'IBM891': [
                                                          ('cp891', False),
                                                          ('csIBM891', False),
                                                          ('IBM891', False),
                                                     ],
                                         'IBM01148': [
                                                          ('CCSID01148', False),
                                                          ('CP01148', False),
                                                          ('ebcdic-international-500+euro', False),
                                                          ('IBM01148', False),
                                                     ],
                                           'IBM424': [
                                                          ('cp424', False),
                                                          ('csIBM424', False),
                                                          ('ebcdic-cp-he', False),
                                                          ('IBM424', False),
                                                     ],
                                             'INIS': [
                                                          ('csISO49INIS', False),
                                                          ('INIS', False),
                                                          ('iso-ir-49', False),
                                                     ],
                                           'IBM775': [
                                                          ('cp775', False),
                                                          ('csPC775Baltic', False),
                                                          ('IBM775', False),
                                                     ],
                                         'IBM00924': [
                                                          ('CCSID00924', False),
                                                          ('CP00924', False),
                                                          ('ebcdic-Latin9--euro', False),
                                                          ('IBM00924', False),
                                                     ],
                                           'INIS-8': [
                                                          ('csISO50INIS8', False),
                                                          ('INIS-8', False),
                                                          ('iso-ir-50', False),
                                                     ],
                                     'ISO_8859-6-I': [
                                                          ('csISO88596I', False),
                                                          ('ISO-8859-6-I', True),
                                                          ('ISO_8859-6-I', False),
                                                     ],
                                         'ISO_5427': [
                                                          ('csISO5427Cyrillic', False),
                                                          ('iso-ir-37', False),
                                                          ('ISO_5427', False),
                                                     ],
                                  'ISO_8859-8:1988': [
                                                          ('csISOLatinHebrew', False),
                                                          ('hebrew', False),
                                                          ('ISO-8859-8', True),
                                                          ('iso-ir-138', False),
                                                          ('ISO_8859-8', False),
                                                          ('ISO_8859-8:1988', False),
                                                     ],
                                     'ISO_8859-6-E': [
                                                          ('csISO88596E', False),
                                                          ('ISO-8859-6-E', True),
                                                          ('ISO_8859-6-E', False),
                                                     ],
                                  'ISO_8859-4:1988': [
                                                          ('csISOLatin4', False),
                                                          ('ISO-8859-4', True),
                                                          ('iso-ir-110', False),
                                                          ('ISO_8859-4', False),
                                                          ('ISO_8859-4:1988', False),
                                                          ('l4', False),
                                                          ('latin4', False),
                                                     ],
                         'ISO-10646-Unicode-Latin1': [
                                                          ('csUnicodeLatin1', False),
                                                          ('ISO-10646', False),
                                                          ('ISO-10646-Unicode-Latin1', False),
                                                     ],
                                    'ISO_6937-2-25': [
                                                          ('csISO6937Add', False),
                                                          ('iso-ir-152', False),
                                                          ('ISO_6937-2-25', False),
                                                     ],
                                           'IBM868': [
                                                          ('cp-ar', False),
                                                          ('CP868', False),
                                                          ('csIBM868', False),
                                                          ('IBM868', False),
                                                     ],
                                           'IBM869': [
                                                          ('869', False),
                                                          ('cp-gr', False),
                                                          ('cp869', False),
                                                          ('csIBM869', False),
                                                          ('IBM869', False),
                                                     ],
                                         'IBM-Thai': [
                                                          ('csIBMThai', False),
                                                          ('IBM-Thai', False),
                                                     ],
                                           'IBM864': [
                                                          ('cp864', False),
                                                          ('csIBM864', False),
                                                          ('IBM864', False),
                                                     ],
                                    'ISO_2033-1983': [
                                                          ('csISO2033', False),
                                                          ('e13b', False),
                                                          ('iso-ir-98', False),
                                                          ('ISO_2033-1983', False),
                                                     ],
                                           'IBM862': [
                                                          ('862', False),
                                                          ('cp862', False),
                                                          ('csPC862LatinHebrew', False),
                                                          ('IBM862', False),
                                                     ],
                                           'IBM860': [
                                                          ('860', False),
                                                          ('cp860', False),
                                                          ('csIBM860', False),
                                                          ('IBM860', False),
                                                     ],
                                           'IBM861': [
                                                          ('861', False),
                                                          ('cp-is', False),
                                                          ('cp861', False),
                                                          ('csIBM861', False),
                                                          ('IBM861', False),
                                                     ],
                                           'IBM866': [
                                                          ('866', False),
                                                          ('cp866', False),
                                                          ('csIBM866', False),
                                                          ('IBM866', False),
                                                     ],
                   'ISO-8859-1-Windows-3.1-Latin-1': [
                                                          ('csWindows31Latin1', False),
                                                          ('ISO-8859-1-Windows-3.1-Latin-1', False),
                                                     ],
                                           'IBM852': [
                                                          ('852', False),
                                                          ('cp852', False),
                                                          ('csPCp852', False),
                                                          ('IBM852', False),
                                                     ],
                                    'ISO_5427:1981': [
                                                          ('iso-ir-54', False),
                                                          ('ISO5427Cyrillic1981', False),
                                                          ('ISO_5427:1981', False),
                                                     ],
                                         'IBM01146': [
                                                          ('CCSID01146', False),
                                                          ('CP01146', False),
                                                          ('ebcdic-gb-285+euro', False),
                                                          ('IBM01146', False),
                                                     ],
                                      'IBM-Symbols': [
                                                          ('csIBMSymbols', False),
                                                          ('IBM-Symbols', False),
                                                     ],
                                           'IBM918': [
                                                          ('CP918', False),
                                                          ('csIBM918', False),
                                                          ('ebcdic-cp-ar2', False),
                                                          ('IBM918', False),
                                                     ],
                                    'ISO_10367-box': [
                                                          ('csISO10367Box', False),
                                                          ('iso-ir-155', False),
                                                          ('ISO_10367-box', False),
                                                     ],
                                  'ISO_8859-1:1987': [
                                                          ('CP819', False),
                                                          ('csISOLatin1', False),
                                                          ('IBM819', False),
                                                          ('ISO-8859-1', True),
                                                          ('iso-ir-100', False),
                                                          ('ISO_8859-1', False),
                                                          ('ISO_8859-1:1987', False),
                                                          ('l1', False),
                                                          ('latin1', False),
                                                     ],
                                           'IBM905': [
                                                          ('CP905', False),
                                                          ('csIBM905', False),
                                                          ('ebcdic-cp-tr', False),
                                                          ('IBM905', False),
                                                     ],
                                           'IBM904': [
                                                          ('904', False),
                                                          ('cp904', False),
                                                          ('csIBBM904', False),
                                                          ('IBM904', False),
                                                     ],
                                      'ISO-8859-16': [
                                                          ('ISO-8859-16', False),
                                                          ('iso-ir-226', False),
                                                          ('ISO_8859-16', False),
                                                          ('ISO_8859-16:2001', False),
                                                          ('l10', False),
                                                          ('latin10', False),
                                                     ],
                                           'IBM871': [
                                                          ('CP871', False),
                                                          ('csIBM871', False),
                                                          ('ebcdic-cp-is', False),
                                                          ('IBM871', False),
                                                     ],
                                      'ISO-8859-15': [
                                                          ('ISO-8859-15', False),
                                                          ('ISO_8859-15', False),
                                                          ('Latin-9', False),
                                                     ],
                                    'ISO_5428:1980': [
                                                          ('csISO5428Greek', False),
                                                          ('iso-ir-55', False),
                                                          ('ISO_5428:1980', False),
                                                     ],
                                  'ISO_8859-5:1988': [
                                                          ('csISOLatinCyrillic', False),
                                                          ('cyrillic', False),
                                                          ('ISO-8859-5', True),
                                                          ('iso-ir-144', False),
                                                          ('ISO_8859-5', False),
                                                          ('ISO_8859-5:1988', False),
                                                     ],
                                         'IBM00858': [
                                                          ('CCSID00858', False),
                                                          ('CP00858', False),
                                                          ('IBM00858', False),
                                                          ('PC-Multilingual-850+euro', False),
                                                     ],
                                          'IBM1047': [
                                                          ('IBM-1047', False),
                                                          ('IBM1047', False),
                                                     ],
                       'ISO-8859-2-Windows-Latin-2': [
                                                          ('csWindows31Latin2', False),
                                                          ('ISO-8859-2-Windows-Latin-2', False),
                                                     ],
                                           'IBM500': [
                                                          ('CP500', False),
                                                          ('csIBM500', False),
                                                          ('ebcdic-cp-be', False),
                                                          ('ebcdic-cp-ch', False),
                                                          ('IBM500', False),
                                                     ],
                                    'INIS-cyrillic': [
                                                          ('csISO51INISCyrillic', False),
                                                          ('INIS-cyrillic', False),
                                                          ('iso-ir-51', False),
                                                     ],
                                           'IBM273': [
                                                          ('CP273', False),
                                                          ('csIBM273', False),
                                                          ('IBM273', False),
                                                     ],
                                           'IBM277': [
                                                          ('csIBM277', False),
                                                          ('EBCDIC-CP-DK', False),
                                                          ('EBCDIC-CP-NO', False),
                                                          ('IBM277', False),
                                                     ],
                                           'IBM275': [
                                                          ('cp275', False),
                                                          ('csIBM275', False),
                                                          ('EBCDIC-BR', False),
                                                          ('IBM275', False),
                                                     ],
                                           'IBM274': [
                                                          ('CP274', False),
                                                          ('csIBM274', False),
                                                          ('EBCDIC-BE', False),
                                                          ('IBM274', False),
                                                     ],
                                           'IBM903': [
                                                          ('cp903', False),
                                                          ('csIBM903', False),
                                                          ('IBM903', False),
                                                     ],
                                           'IBM278': [
                                                          ('CP278', False),
                                                          ('csIBM278', False),
                                                          ('ebcdic-cp-fi', False),
                                                          ('ebcdic-cp-se', False),
                                                          ('IBM278', False),
                                                     ],
                                         'IBM01140': [
                                                          ('CCSID01140', False),
                                                          ('CP01140', False),
                                                          ('ebcdic-us-37+euro', False),
                                                          ('IBM01140', False),
                                                     ],
                                        'iso-ir-90': [
                                                          ('csISO90', False),
                                                          ('iso-ir-90', False),
                                                     ],
                                         'IBM01142': [
                                                          ('CCSID01142', False),
                                                          ('CP01142', False),
                                                          ('ebcdic-dk-277+euro', False),
                                                          ('ebcdic-no-277+euro', False),
                                                          ('IBM01142', False),
                                                     ],
                                         'IBM01144': [
                                                          ('CCSID01144', False),
                                                          ('CP01144', False),
                                                          ('ebcdic-it-280+euro', False),
                                                          ('IBM01144', False),
                                                     ],
                                         'IBM01145': [
                                                          ('CCSID01145', False),
                                                          ('CP01145', False),
                                                          ('ebcdic-es-284+euro', False),
                                                          ('IBM01145', False),
                                                     ],
                                         'IBM01149': [
                                                          ('CCSID01149', False),
                                                          ('CP01149', False),
                                                          ('ebcdic-is-871+euro', False),
                                                          ('IBM01149', False),
                                                     ],
                   'ISO-8859-1-Windows-3.0-Latin-1': [
                                                          ('csWindows30Latin1', False),
                                                          ('ISO-8859-1-Windows-3.0-Latin-1', False),
                                                     ],
                                           'IBM437': [
                                                          ('437', False),
                                                          ('cp437', False),
                                                          ('csPC8CodePage437', False),
                                                          ('IBM437', False),
                                                     ],
                                           'IBM857': [
                                                          ('857', False),
                                                          ('cp857', False),
                                                          ('csIBM857', False),
                                                          ('IBM857', False),
                                                     ],
                             'ISO-Unicode-IBM-1268': [
                                                          ('csUnicodeIBM1268', False),
                                                          ('ISO-Unicode-IBM-1268', False),
                                                     ],
                                    'ISO_8859-supp': [
                                                          ('csISO8859Supp', False),
                                                          ('iso-ir-154', False),
                                                          ('ISO_8859-supp', False),
                                                          ('latin1-2-5', False),
                                                     ],
                                           'IBM851': [
                                                          ('851', False),
                                                          ('cp851', False),
                                                          ('csIBM851', False),
                                                          ('IBM851', False),
                                                     ],
                                           'IBM850': [
                                                          ('850', False),
                                                          ('cp850', False),
                                                          ('csPC850Multilingual', False),
                                                          ('IBM850', False),
                                                     ],
                             'ISO-Unicode-IBM-1261': [
                                                          ('csUnicodeIBM1261', False),
                                                          ('ISO-Unicode-IBM-1261', False),
                                                     ],
                             'ISO-Unicode-IBM-1264': [
                                                          ('csUnicodeIBM1264', False),
                                                          ('ISO-Unicode-IBM-1264', False),
                                                     ],
                             'ISO-Unicode-IBM-1265': [
                                                          ('csUnicodeIBM1265', False),
                                                          ('ISO-Unicode-IBM-1265', False),
                                                     ],
                                  'ISO_8859-6:1987': [
                                                          ('arabic', False),
                                                          ('ASMO-708', False),
                                                          ('csISOLatinArabic', False),
                                                          ('ECMA-114', False),
                                                          ('ISO-8859-6', True),
                                                          ('iso-ir-127', False),
                                                          ('ISO_8859-6', False),
                                                          ('ISO_8859-6:1987', False),
                                                     ],
                                 'ISO_646.irv:1983': [
                                                          ('csISO2IntlRefVersion', False),
                                                          ('irv', False),
                                                          ('iso-ir-2', False),
                                                          ('ISO_646.irv:1983', False),
                                                     ],
                                           'IBM284': [
                                                          ('CP284', False),
                                                          ('csIBM284', False),
                                                          ('ebcdic-cp-es', False),
                                                          ('IBM284', False),
                                                     ],
                                           'IBM280': [
                                                          ('CP280', False),
                                                          ('csIBM280', False),
                                                          ('ebcdic-cp-it', False),
                                                          ('IBM280', False),
                                                     ],
                                           'IBM281': [
                                                          ('cp281', False),
                                                          ('csIBM281', False),
                                                          ('EBCDIC-JP-E', False),
                                                          ('IBM281', False),
                                                     ],
                                     'ISO_8859-8-E': [
                                                          ('csISO88598E', False),
                                                          ('ISO-8859-8-E', True),
                                                          ('ISO_8859-8-E', False),
                                                     ],
                                     'ISO_8859-8-I': [
                                                          ('csISO88598I', False),
                                                          ('ISO-8859-8-I', True),
                                                          ('ISO_8859-8-I', False),
                                                     ],
                                           'IBM870': [
                                                          ('CP870', False),
                                                          ('csIBM870', False),
                                                          ('ebcdic-cp-roece', False),
                                                          ('ebcdic-cp-yu', False),
                                                          ('IBM870', False),
                                                     ],
                                  'ISO_8859-2:1987': [
                                                          ('csISOLatin2', False),
                                                          ('ISO-8859-2', True),
                                                          ('iso-ir-101', False),
                                                          ('ISO_8859-2', False),
                                                          ('ISO_8859-2:1987', False),
                                                          ('l2', False),
                                                          ('latin2', False),
                                                     ],
                                          'IBM1026': [
                                                          ('CP1026', False),
                                                          ('csIBM1026', False),
                                                          ('IBM1026', False),
                                                     ],
                                           'IBM285': [
                                                          ('CP285', False),
                                                          ('csIBM285', False),
                                                          ('ebcdic-cp-gb', False),
                                                          ('IBM285', False),
                                                     ],
                                    'ISO-10646-J-1': [
                                                          ('ISO-10646-J-1', False),
                                                     ],
                                    'ISO-2022-JP-2': [
                                                          ('csISO2022JP2', False),
                                                          ('ISO-2022-JP-2', True),
                                                     ],
                                  'ISO-2022-CN-EXT': [
                                                          ('ISO-2022-CN-EXT', False),
                                                     ],
                                      'ISO-8859-14': [
                                                          ('ISO-8859-14', False),
                                                          ('iso-celtic', False),
                                                          ('iso-ir-199', False),
                                                          ('ISO_8859-14', False),
                                                          ('ISO_8859-14:1998', False),
                                                          ('l8', False),
                                                          ('latin8', False),
                                                     ],
                                      'ISO-8859-13': [
                                                          ('ISO-8859-13', False),
                                                     ],
                                           'IBM423': [
                                                          ('cp423', False),
                                                          ('csIBM423', False),
                                                          ('ebcdic-cp-gr', False),
                                                          ('IBM423', False),
                                                     ],
                                           'IBM420': [
                                                          ('cp420', False),
                                                          ('csIBM420', False),
                                                          ('ebcdic-cp-ar1', False),
                                                          ('IBM420', False),
                                                     ],
                                      'ISO-8859-10': [
                                                          ('csISOLatin6', False),
                                                          ('ISO-8859-10', True),
                                                          ('iso-ir-157', False),
                                                          ('ISO_8859-10:1992', False),
                                                          ('l6', False),
                                                          ('latin6', False),
                                                     ],
                                           'IBM297': [
                                                          ('cp297', False),
                                                          ('csIBM297', False),
                                                          ('ebcdic-cp-fr', False),
                                                          ('IBM297', False),
                                                     ],
                                           'IBM290': [
                                                          ('cp290', False),
                                                          ('csIBM290', False),
                                                          ('EBCDIC-JP-kana', False),
                                                          ('IBM290', False),
                                                     ],
                                  'ISO-10646-UCS-4': [
                                                          ('csUCS4', False),
                                                          ('ISO-10646-UCS-4', False),
                                                     ],
                                           'IBM037': [
                                                          ('cp037', False),
                                                          ('csIBM037', False),
                                                          ('ebcdic-cp-ca', False),
                                                          ('ebcdic-cp-nl', False),
                                                          ('ebcdic-cp-us', False),
                                                          ('ebcdic-cp-wt', False),
                                                          ('IBM037', False),
                                                     ],
                                  'ISO-10646-UCS-2': [
                                                          ('csUnicode', False),
                                                          ('ISO-10646-UCS-2', False),
                                                     ],
                                  'ISO-10646-UTF-1': [
                                                          ('csISO10646UTF1', False),
                                                          ('ISO-10646-UTF-1', False),
                                                     ],
                                         'IBM01143': [
                                                          ('CCSID01143', False),
                                                          ('CP01143', False),
                                                          ('ebcdic-fi-278+euro', False),
                                                          ('ebcdic-se-278+euro', False),
                                                          ('IBM01143', False),
                                                     ],
                             'ISO-Unicode-IBM-1276': [
                                                          ('csUnicodeIBM1276', False),
                                                          ('ISO-Unicode-IBM-1276', False),
                                                     ],
                                           'IBM863': [
                                                          ('863', False),
                                                          ('cp863', False),
                                                          ('csIBM863', False),
                                                          ('IBM863', False),
                                                     ],
                               'ISO_646.basic:1983': [
                                                          ('csISO646basic1983', False),
                                                          ('ISO_646.basic:1983', False),
                                                          ('ref', False),
                                                     ],
                                   'ISO_6937-2-add': [
                                                          ('csISOTextComm', False),
                                                          ('iso-ir-142', False),
                                                          ('ISO_6937-2-add', False),
                                                     ],
                                      'ISO-2022-JP': [
                                                          ('csISO2022JP', False),
                                                          ('ISO-2022-JP', True),
                                                     ],
                                  'ISO_8859-7:1987': [
                                                          ('csISOLatinGreek', False),
                                                          ('ECMA-118', False),
                                                          ('ELOT_928', False),
                                                          ('greek', False),
                                                          ('greek8', False),
                                                          ('ISO-8859-7', True),
                                                          ('iso-ir-126', False),
                                                          ('ISO_8859-7', False),
                                                          ('ISO_8859-7:1987', False),
                                                     ],
                              'ISO-10646-UCS-Basic': [
                                                          ('csUnicodeASCII', False),
                                                          ('ISO-10646-UCS-Basic', False),
                                                     ],
                                               'IT': [
                                                          ('csISO15Italian', False),
                                                          ('iso-ir-15', False),
                                                          ('ISO646-IT', False),
                                                          ('IT', False),
                                                     ],
                                           'IBM855': [
                                                          ('855', False),
                                                          ('cp855', False),
                                                          ('csIBM855', False),
                                                          ('IBM855', False),
                                                     ],
                                           'IBM038': [
                                                          ('cp038', False),
                                                          ('csIBM038', False),
                                                          ('EBCDIC-INT', False),
                                                          ('IBM038', False),
                                                     ],
                                           'IBM880': [
                                                          ('cp880', False),
                                                          ('csIBM880', False),
                                                          ('EBCDIC-Cyrillic', False),
                                                          ('IBM880', False),
                                                     ],
                       'ISO-8859-9-Windows-Latin-5': [
                                                          ('csWindows31Latin5', False),
                                                          ('ISO-8859-9-Windows-Latin-5', False),
                                                     ],
                                  'ISO_8859-3:1988': [
                                                          ('csISOLatin3', False),
                                                          ('ISO-8859-3', True),
                                                          ('iso-ir-109', False),
                                                          ('ISO_8859-3', False),
                                                          ('ISO_8859-3:1988', False),
                                                          ('l3', False),
                                                          ('latin3', False),
                                                     ],
                                      'ISO-2022-CN': [
                                                          ('ISO-2022-CN', False),
                                                     ],
                                      'ISO-2022-KR': [
                                                          ('csISO2022KR', False),
                                                          ('ISO-2022-KR', True),
                                                     ],
                                   'JIS_C6226-1978': [
                                                          ('csISO42JISC62261978', False),
                                                          ('iso-ir-42', False),
                                                          ('JIS_C6226-1978', False),
                                                     ],
                                'JUS_I.B1.003-serb': [
                                                          ('csISO146Serbian', False),
                                                          ('iso-ir-146', False),
                                                          ('JUS_I.B1.003-serb', False),
                                                          ('serbian', False),
                                                     ],
                              'JIS_C6229-1984-hand': [
                                                          ('csISO94JIS62291984hand', False),
                                                          ('iso-ir-94', False),
                                                          ('JIS_C6229-1984-hand', False),
                                                          ('jp-ocr-hand', False),
                                                     ],
                                'JIS_C6220-1969-jp': [
                                                          ('csISO13JISC6220jp', False),
                                                          ('iso-ir-13', False),
                                                          ('JIS_C6220-1969', False),
                                                          ('JIS_C6220-1969-jp', False),
                                                          ('katakana', False),
                                                          ('x0201-7', False),
                                                     ],
                                'JIS_C6220-1969-ro': [
                                                          ('csISO14JISC6220ro', False),
                                                          ('iso-ir-14', False),
                                                          ('ISO646-JP', False),
                                                          ('JIS_C6220-1969-ro', False),
                                                          ('jp', False),
                                                     ],
                             'JIS_C6229-1984-b-add': [
                                                          ('csISO93JIS62291984badd', False),
                                                          ('iso-ir-93', False),
                                                          ('JIS_C6229-1984-b-add', False),
                                                          ('jp-ocr-b-add', False),
                                                     ],
                          'JIS_C6229-1984-hand-add': [
                                                          ('csISO95JIS62291984handadd', False),
                                                          ('iso-ir-95', False),
                                                          ('JIS_C6229-1984-hand-add', False),
                                                          ('jp-ocr-hand-add', False),
                                                     ],
                                        'JIS_X0201': [
                                                          ('csHalfWidthKatakana', False),
                                                          ('JIS_X0201', False),
                                                          ('X0201', False),
                                                     ],
                                   'JIS_C6226-1983': [
                                                          ('csISO87JISX0208', False),
                                                          ('iso-ir-87', False),
                                                          ('JIS_C6226-1983', False),
                                                          ('JIS_X0208-1983', False),
                                                          ('x0208', False),
                                                     ],
                              'JIS_C6229-1984-kana': [
                                                          ('csISO96JISC62291984kana', False),
                                                          ('iso-ir-96', False),
                                                          ('JIS_C6229-1984-kana', False),
                                                     ],
                                   'JIS_X0212-1990': [
                                                          ('csISO159JISX02121990', False),
                                                          ('iso-ir-159', False),
                                                          ('JIS_X0212-1990', False),
                                                          ('x0212', False),
                                                     ],
                                 'JIS_C6229-1984-a': [
                                                          ('csISO91JISC62291984a', False),
                                                          ('iso-ir-91', False),
                                                          ('JIS_C6229-1984-a', False),
                                                          ('jp-ocr-a', False),
                                                     ],
                                 'JIS_C6229-1984-b': [
                                                          ('csISO92JISC62991984b', False),
                                                          ('iso-ir-92', False),
                                                          ('ISO646-JP-OCR-B', False),
                                                          ('JIS_C6229-1984-b', False),
                                                          ('jp-ocr-b', False),
                                                     ],
                                     'JIS_Encoding': [
                                                          ('csJISEncoding', False),
                                                          ('JIS_Encoding', False),
                                                     ],
                                     'JUS_I.B1.002': [
                                                          ('csISO141JUSIB1002', False),
                                                          ('iso-ir-141', False),
                                                          ('ISO646-YU', False),
                                                          ('js', False),
                                                          ('JUS_I.B1.002', False),
                                                          ('yu', False),
                                                     ],
                                 'JUS_I.B1.003-mac': [
                                                          ('csISO147Macedonian', False),
                                                          ('iso-ir-147', False),
                                                          ('JUS_I.B1.003-mac', False),
                                                          ('macedonian', False),
                                                     ],
                                   'KS_C_5601-1987': [
                                                          ('csKSC56011987', False),
                                                          ('iso-ir-149', False),
                                                          ('korean', False),
                                                          ('KSC_5601', False),
                                                          ('KS_C_5601-1987', False),
                                                          ('KS_C_5601-1989', False),
                                                     ],
                                    'KOI7-switched': [
                                                          ('KOI7-switched', False),
                                                     ],
                                          'KZ-1048': [
                                                          ('csKZ1048', False),
                                                          ('KZ-1048', False),
                                                          ('RK1048', False),
                                                          ('STRK1048-2002', False),
                                                     ],
                                          'KSC5636': [
                                                          ('csKSC5636', False),
                                                          ('ISO646-KR', False),
                                                          ('KSC5636', False),
                                                     ],
                                           'KOI8-U': [
                                                          ('KOI8-U', False),
                                                     ],
                                           'KOI8-R': [
                                                          ('csKOI8R', False),
                                                          ('KOI8-R', True),
                                                     ],
                                    'Latin-greek-1': [
                                                          ('csISO27LatinGreek1', False),
                                                          ('iso-ir-27', False),
                                                          ('Latin-greek-1', False),
                                                     ],
                                        'latin-lap': [
                                                          ('csISO158Lap', False),
                                                          ('iso-ir-158', False),
                                                          ('lap', False),
                                                          ('latin-lap', False),
                                                     ],
                                      'latin-greek': [
                                                          ('csISO19LatinGreek', False),
                                                          ('iso-ir-19', False),
                                                          ('latin-greek', False),
                                                     ],
                                       'MSZ_7795.3': [
                                                          ('csISO86Hungarian', False),
                                                          ('hu', False),
                                                          ('iso-ir-86', False),
                                                          ('ISO646-HU', False),
                                                          ('MSZ_7795.3', False),
                                                     ],
                                        'macintosh': [
                                                          ('csMacintosh', False),
                                                          ('mac', False),
                                                          ('macintosh', False),
                                                     ],
                             'Microsoft-Publishing': [
                                                          ('csMicrosoftPublishing', False),
                                                          ('Microsoft-Publishing', False),
                                                     ],
                                             'MNEM': [
                                                          ('csMnem', False),
                                                          ('MNEM', False),
                                                     ],
                                         'MNEMONIC': [
                                                          ('csMnemonic', False),
                                                          ('MNEMONIC', False),
                                                     ],
                                    'NC_NC00-10:81': [
                                                          ('csISO151Cuba', False),
                                                          ('cuba', False),
                                                          ('iso-ir-151', False),
                                                          ('ISO646-CU', False),
                                                          ('NC_NC00-10:81', False),
                                                     ],
                                        'NS_4551-1': [
                                                          ('csISO60DanishNorwegian', False),
                                                          ('csISO60Norwegian1', False),
                                                          ('iso-ir-60', False),
                                                          ('ISO646-NO', False),
                                                          ('no', False),
                                                          ('NS_4551-1', False),
                                                     ],
                                        'NS_4551-2': [
                                                          ('csISO61Norwegian2', False),
                                                          ('iso-ir-61', False),
                                                          ('ISO646-NO2', False),
                                                          ('no2', False),
                                                          ('NS_4551-2', False),
                                                     ],
                                        'NATS-DANO': [
                                                          ('csNATSDANO', False),
                                                          ('iso-ir-9-1', False),
                                                          ('NATS-DANO', False),
                                                     ],
                                    'NATS-SEFI-ADD': [
                                                          ('csNATSSEFIADD', False),
                                                          ('iso-ir-8-2', False),
                                                          ('NATS-SEFI-ADD', False),
                                                     ],
                                      'NF_Z_62-010': [
                                                          ('csISO69French', False),
                                                          ('fr', False),
                                                          ('iso-ir-69', False),
                                                          ('ISO646-FR', False),
                                                          ('NF_Z_62-010', False),
                                                     ],
                                    'NATS-DANO-ADD': [
                                                          ('csNATSDANOADD', False),
                                                          ('iso-ir-9-2', False),
                                                          ('NATS-DANO-ADD', False),
                                                     ],
                               'NF_Z_62-010_(1973)': [
                                                          ('csISO25French', False),
                                                          ('iso-ir-25', False),
                                                          ('ISO646-FR1', False),
                                                          ('NF_Z_62-010_(1973)', False),
                                                     ],
                                        'NATS-SEFI': [
                                                          ('csNATSSEFI', False),
                                                          ('iso-ir-8-1', False),
                                                          ('NATS-SEFI', False),
                                                     ],
                               'OSD_EBCDIC_DF04_15': [
                                                          ('OSD_EBCDIC_DF04_15', False),
                                                     ],
                              'OSD_EBCDIC_DF03_IRV': [
                                                          ('OSD_EBCDIC_DF03_IRV', False),
                                                     ],
                                'OSD_EBCDIC_DF04_1': [
                                                          ('OSD_EBCDIC_DF04_1', False),
                                                     ],
                                               'PT': [
                                                          ('csISO16Portuguese', False),
                                                          ('iso-ir-16', False),
                                                          ('ISO646-PT', False),
                                                          ('PT', False),
                                                     ],
                                          'PTCP154': [
                                                          ('CP154', False),
                                                          ('csPTCP154', False),
                                                          ('Cyrillic-Asian', False),
                                                          ('PT154', False),
                                                          ('PTCP154', False),
                                                     ],
                             'PC8-Danish-Norwegian': [
                                                          ('csPC8DanishNorwegian', False),
                                                          ('PC8-Danish-Norwegian', False),
                                                     ],
                                      'PC8-Turkish': [
                                                          ('csPC8Turkish', False),
                                                          ('PC8-Turkish', False),
                                                     ],
                                              'PT2': [
                                                          ('csISO84Portuguese2', False),
                                                          ('iso-ir-84', False),
                                                          ('ISO646-PT2', False),
                                                          ('PT2', False),
                                                     ],
                                             'SCSU': [
                                                          ('SCSU', False),
                                                     ],
                                     'SEN_850200_C': [
                                                          ('csISO11SwedishForNames', False),
                                                          ('iso-ir-11', False),
                                                          ('ISO646-SE2', False),
                                                          ('se2', False),
                                                          ('SEN_850200_C', False),
                                                     ],
                                     'SEN_850200_B': [
                                                          ('csISO10Swedish', False),
                                                          ('FI', False),
                                                          ('iso-ir-10', False),
                                                          ('ISO646-FI', False),
                                                          ('ISO646-SE', False),
                                                          ('se', False),
                                                          ('SEN_850200_B', False),
                                                     ],
                                        'Shift_JIS': [
                                                          ('csShiftJIS', False),
                                                          ('MS_Kanji', False),
                                                          ('Shift_JIS', True),
                                                     ],
                                        'T.61-7bit': [
                                                          ('csISO102T617bit', False),
                                                          ('iso-ir-102', False),
                                                          ('T.61-7bit', False),
                                                     ],
                                            'TSCII': [
                                                          ('csTSCII', False),
                                                          ('TSCII', False),
                                                     ],
                                          'TIS-620': [
                                                          ('TIS-620', False),
                                                     ],
                                        'T.61-8bit': [
                                                          ('csISO103T618bit', False),
                                                          ('iso-ir-103', False),
                                                          ('T.61', False),
                                                          ('T.61-8bit', False),
                                                     ],
                                         'T.101-G2': [
                                                          ('csISO128T101G2', False),
                                                          ('iso-ir-128', False),
                                                          ('T.101-G2', False),
                                                     ],
                                'UNICODE-1-1-UTF-7': [
                                                          ('csUnicode11UTF7', False),
                                                          ('UNICODE-1-1-UTF-7', False),
                                                     ],
                                      'UNICODE-1-1': [
                                                          ('csUnicode11', False),
                                                          ('UNICODE-1-1', False),
                                                     ],
                                            'us-dk': [
                                                          ('csUSDK', False),
                                                          ('us-dk', False),
                                                     ],
                                         'UTF-32LE': [
                                                          ('UTF-32LE', False),
                                                     ],
                                         'UTF-16LE': [
                                                          ('UTF-16LE', False),
                                                     ],
                                         'UTF-32BE': [
                                                          ('UTF-32BE', False),
                                                     ],
                                         'UTF-16BE': [
                                                          ('UTF-16BE', False),
                                                     ],
                                           'UTF-16': [
                                                          ('UTF-16', False),
                                                     ],
                                     'UNKNOWN-8BIT': [
                                                          ('csUnknown8BiT', False),
                                                          ('UNKNOWN-8BIT', False),
                                                     ],
                                            'UTF-8': [
                                                          ('UTF-8', False),
                                                     ],
                                            'UTF-7': [
                                                          ('UTF-7', False),
                                                     ],
                                           'UTF-32': [
                                                          ('UTF-32', False),
                                                     ],
                                             'VIQR': [
                                                          ('csVIQR', False),
                                                          ('VIQR', False),
                                                     ],
                                       'Ventura-US': [
                                                          ('csVenturaUS', False),
                                                          ('Ventura-US', False),
                                                     ],
                                           'VISCII': [
                                                          ('csVISCII', False),
                                                          ('VISCII', False),
                                                     ],
                                   'videotex-suppl': [
                                                          ('csISO70VideotexSupp1', False),
                                                          ('iso-ir-70', False),
                                                          ('videotex-suppl', False),
                                                     ],
                            'Ventura-International': [
                                                          ('csVenturaInternational', False),
                                                          ('Ventura-International', False),
                                                     ],
                                     'Ventura-Math': [
                                                          ('csVenturaMath', False),
                                                          ('Ventura-Math', False),
                                                     ],
                                     'windows-1254': [
                                                          ('windows-1254', False),
                                                     ],
                                     'windows-1258': [
                                                          ('windows-1258', False),
                                                     ],
                                     'windows-1256': [
                                                          ('windows-1256', False),
                                                     ],
                                     'windows-1257': [
                                                          ('windows-1257', False),
                                                     ],
                                     'windows-1255': [
                                                          ('windows-1255', False),
                                                     ],
                                     'windows-1252': [
                                                          ('windows-1252', False),
                                                     ],
                                     'windows-1253': [
                                                          ('windows-1253', False),
                                                     ],
                                     'windows-1250': [
                                                          ('windows-1250', False),
                                                     ],
                                     'windows-1251': [
                                                          ('windows-1251', False),
                                                     ],
                                      'Windows-31J': [
                                                          ('csWindows31J', False),
                                                          ('Windows-31J', False),
                                                     ],
}
alphanumRE = re.compile( '[^A-Za-z0-9]')
#######################################################
def normalizeEncodingName( s):
    t = re.sub( alphanumRE, '', s).upper()
    return t

codePageRE = re.compile( '^CP[0-9]+$', re.I)
#######################################################
def selectPreferred( tupleList):
    trueList = []
    utfList = []
    isoList = []
    gbList = []
    codePageList = []
    falseList = []

    for thisTuple in tupleList:
        if thisTuple[ 1] == True:
            trueList.append( thisTuple)
        if thisTuple[ 0].upper().startswith( 'UTF-'):
            utfList.append( thisTuple)
        if thisTuple[ 0].upper().startswith( 'ISO-'):
            isoList.append( thisTuple)
        if thisTuple[ 0].upper().startswith( 'GB'):
            gbList.append( thisTuple)
        if codePageRE.match( thisTuple[ 0]):  ## 'cp0000'
            codePageList.append( thisTuple)
        if thisTuple[ 1] == False:
            falseList.append( thisTuple)

    if len( trueList) > 0:
        return trueList[ 0][ 0]
    if len( utfList) > 0:
        return utfList[ 0][ 0]
    if len( isoList) > 0:
        return isoList[ 0][ 0]
    if len( gbList) > 0:
        return gbList[ 0][ 0]
    if len( codePageList) > 0:
        return codePageList[ 0][ 0]
    if len( falseList) > 0:
        return falseList[ 0][ 0]
    return tupleList[ 0][ 0]

#######################################################
def takeCensusOfEncodings():
    global IANAEncodingsDict, encodingsLookupDict, encodingsAliasesDict

    supportedIANAEncodings = {}

    keys = sorted( IANAEncodingsDict.keys())
    for key in keys:
        for thisTuple in IANAEncodingsDict[ key]:
            try:
                z = 'asdf'.encode( thisTuple[ 0])
            except LookupError:
                continue
            normKey = normalizeEncodingName( thisTuple[ 0])
            if normKey in supportedIANAEncodings:
                supportedIANAEncodings[ normKey].extend( IANAEncodingsDict[ key])  ## extend existing (newly copied) list
            else:
                supportedIANAEncodings[ normKey] = IANAEncodingsDict[ key][ :]  ## new copy of list

## ##     for k, v in encodings.aliases.aliases.iteritems():
##     encodingKeys = sorted( encodings.aliases.aliases.keys())
##     for k, v in encodings.aliases.aliases:
    for k in encodings.aliases.aliases:
        v = encodings.aliases.aliases[ k]
        try:
            z = 'asdf'.encode( v)
        except LookupError:
            continue
        try:
            z = 'asdf'.encode( k)
        except LookupError:
            continue

        normKeyK = normalizeEncodingName( k)
        if normKeyK in supportedIANAEncodings:
            supportedIANAEncodings[ normKeyK].append( ( k, None))
            supportedIANAEncodings[ normKeyK].append( ( v, None))
        else:
            supportedIANAEncodings[ normKeyK] = [ ( k, None), ( v, None)]

        normKeyV = normalizeEncodingName( v)
        if normKeyV in supportedIANAEncodings:
            supportedIANAEncodings[ normKeyV].append( ( k, None))
            supportedIANAEncodings[ normKeyV].append( ( v, None))
        else:
            supportedIANAEncodings[ normKeyV] = [ ( k, None), ( v, None)]

     
    ## merge
    while True:
        madeChange = False
        sIEKeys = sorted( supportedIANAEncodings.keys())
        for sIEKey in sIEKeys:
            ecTupleList = supportedIANAEncodings[ sIEKey]
            for ecTuple1 in ecTupleList:
                normName = normalizeEncodingName( ecTuple1[ 0])
                for ecTuple2 in ecTupleList:
                    if normName in supportedIANAEncodings:
                        if ecTuple2 not in supportedIANAEncodings[ normName]:
                            supportedIANAEncodings[ normName].append( ecTuple2)
                            madeChange = True
                    else:
                        supportedIANAEncodings[ normName] = ecTupleList[:]
                        madeChange = True
        if not madeChange: break
    
    ## remove redundancies
    while True:
        madeChange = False
        sIEKeys = sorted( supportedIANAEncodings.keys())
        for sIEKey in sIEKeys:
            tupleCtr1 = 0
            ecTupleList = supportedIANAEncodings[ sIEKey]
            while tupleCtr1 < len( ecTupleList):
                tuple1 = ecTupleList[ tupleCtr1]
                tupleCtr2 = tupleCtr1 + 1
                while tupleCtr2 < len( ecTupleList):
                    tuple2 = ecTupleList[ tupleCtr2]
                    if tuple1[ 0].upper() == tuple2[ 0].upper():  ## they are essentially the same and one of them should be eliminated
                        if tuple1[ 1] == None:
                            del ecTupleList[ tupleCtr1]
                            madeChange = True
                        elif tuple2[ 1] == None:
                            del ecTupleList[ tupleCtr2]
                            madeChange = True
                        elif tuple1[ 1] == True and tuple2[ 1] == False:
                            del ecTupleList[ tupleCtr2]
                            madeChange = True
                        elif tuple1[ 1] == False and tuple2[ 1] == True:
                            del ecTupleList[ tupleCtr1]
                            madeChange = True
                        elif tuple1[ 1] == tuple2[ 1]:
                            del ecTupleList[ tupleCtr2]
                            madeChange = True
                        else:
                            errMsg( 'internal error: unanticipated situation; tuple1 = "%s", tuple2 = "%s"' % ( tuple1, tuple2))
                            if tokenTweakDebug:
                                import pdb
                                pdb.set_trace()
                            sys.exit( 1)
                    elif tuple1[ 0].upper().replace( '_', '-') == tuple2[ 0].upper().replace( '_', '-'):  ## they are the same except for underscore/dash 
                        if tuple1[ 1] == True:
                            del ecTupleList[ tupleCtr2]
                            madeChange = True
                        elif tuple2[ 1] == True:
                            del ecTupleList[ tupleCtr1]
                            madeChange = True
                        elif '_' not in tuple1[ 0]:
                            del ecTupleList[ tupleCtr2]
                            madeChange = True
                        else:
                            del ecTupleList[ tupleCtr1]
                            madeChange = True

                    tupleCtr2 += 1
                    if not ( tupleCtr1 < len( ecTupleList)): break
                tupleCtr1 += 1                    
        if not madeChange: break

    ## remove entries with only 'None' (only source was Python encodings module; these include codecs that are not actually charset encodings)
    sIEKeys = sorted( supportedIANAEncodings.keys())
    for sIEKey in sIEKeys:
        ecTupleList = supportedIANAEncodings[ sIEKey]
        foundNonNone = False
        for ecTuple in ecTupleList:
            if ecTuple[ 1] != None:
                foundNonNone = True
        if not foundNonNone:
            del supportedIANAEncodings[ sIEKey]

    ## put back any that were deleted wrongly in that they are still referenced elsewhere in the dictionary
    sIEKeys = sorted( supportedIANAEncodings.keys())
    for sIEKey in sIEKeys:
        ecTupleList = supportedIANAEncodings[ sIEKey]
        for ecTuple in ecTupleList:
            normName = normalizeEncodingName( ecTuple[ 0])
            if not normName in supportedIANAEncodings:
                supportedIANAEncodings[ normName] = ecTupleList

    ## torture test.
    sIEKeys = sorted( supportedIANAEncodings.keys())
    for sIEKey in sIEKeys:
        ecTupleList = supportedIANAEncodings[ sIEKey]
        
        tupleCtr = 0
        while tupleCtr < len( ecTupleList):
            ecTuple = ecTupleList[ tupleCtr]
            try:
                FO = codecs.open( '.starparser.tmp', 'w', ecTuple[ 0])
            except LookupError:
                del ecTupleList[ tupleCtr]
                continue
            thisEncodingName = FO.encoding
            FO.close()
            tupleCtr += 1
    os.unlink( '.starparser.tmp')
    sIEKeys = sorted( supportedIANAEncodings.keys())
    for sIEKey in sIEKeys:
        ecTupleList = supportedIANAEncodings[ sIEKey]
        if len( ecTupleList) == 0:
            del supportedIANAEncodings[ sIEKey]


    ## create encodingsLookupDict, which lists only the preferred encoding name 
    encodingsLookupDict = supportedIANAEncodings.copy()
    sIEKeys = sorted( encodingsLookupDict.keys())
    for sIEKey in sIEKeys:
        encodingsLookupDict[ sIEKey] = selectPreferred( encodingsLookupDict[ sIEKey])

    ## create the encodings aliases dictionary from what's left.  This is shown to the user when
    ## the -encodings option is specified.
    encodingsAliasesDict = {}
    sIEKeys = sorted( supportedIANAEncodings.keys())
    for sIEKey in sIEKeys:
        ecTupleList = supportedIANAEncodings[ sIEKey]
        for ecTuple in ecTupleList:
            normName = normalizeEncodingName( ecTuple[ 0])
            preferredName = encodingsLookupDict[ normName]
            if preferredName in encodingsAliasesDict:
                if ecTuple[ 0] not in encodingsAliasesDict[ preferredName] and ecTuple[ 0] != preferredName:
                    encodingsAliasesDict[ preferredName].append( ecTuple[ 0])
            else:
                if ecTuple[ 0] != preferredName:
                    encodingsAliasesDict[ preferredName] = [ ecTuple[ 0]]
    for encodingsAliasesDictKey in encodingsAliasesDict.keys():
        encodingsAliasesDict[ encodingsAliasesDictKey] = sorted( encodingsAliasesDict[ encodingsAliasesDictKey])

#######################################################
def doShowEncodings():
    global defaultEncoding, encodingsAliasesDict

    writeOutput( starErrorsStreamFO,
                 """
Below are the names of available encodings for use with the
-inputEncoding (input encoding) and -outputEncoding (output encoding)
options.  When multiple names appear on the same line, they
are all aliases for the same codec (COder/DECoder).  If one
of the aliases is the"preferred MIME name" it is the one in
the left column.  (For several codecs there is no "preferred
MIME name".)

The default encoding for both input and output is "%s".

""" % ( defaultEncoding),
        '-StarErrors',
    )

    ## IANA names are the ones to use because they are presumably recognized
    ## by XML processors as the value of the "encoding" parameter of the
    ## XML declaration.

    lines = []
    length = 0
    encodingsAliasesKeys = sorted( encodingsAliasesDict.keys(), key=str.upper)
    for encodingsAliasesKey in encodingsAliasesKeys:
        if len( encodingsAliasesKey) > length:
            length = len( encodingsAliasesKey)

    controlString = '%%-%d.%ds %%s\n' % ( length + 2, length + 2)
    for encodingsAliasesKey in encodingsAliasesKeys:
        aliasList = encodingsAliasesDict[ encodingsAliasesKey]
        sorted( aliasList, key=str.upper)
        writeOutput(
            starErrorsStreamFO,
            controlString % ( encodingsAliasesKey, ', '.join( aliasList)),
            '-StarErrors',
        )

################################
### ENCODING STUFF ENDS HERE ###
################################



dtdSourceString = '\
<!-- XML-based IEML Syntax Document Type Definition (DTD)\n\
     version %s -->\n%s' % (
     STARPARSER_VERSION,
     dtdSourceString,
)

STARPARSER_ABOUT = """
This "starparser.py" IEML Expression Parser
converts IEML "Star Language" expressions to XML
and/or JSON.
Copyright %s Pierre Levy.  Version %s.

%s
""" % (
    cvsDateVector[ 0],
    STARPARSER_VERSION,
    LICENSE_NOTICE,
)

STARPARSER_RELEASENOTES = """

starparser.py RELEASE NOTES in chronological order:

Version 1 delivered 2007-11-27.

Version 2 delivered 2008-10-30.

  USL support added; slashes (/) separate category
  expressions within USLs.

  DTD altered accordingly:

    <ieml>s now contain <category>s.

    <ieml> now has "exprType" attribute; values are
      "single-category" and "USL".  If an <ieml> contains
      no slashes (not counting comment delimiters), it is a
      single-category expression.  If an <ieml> contains
      any slashes (not counting comment delimiters), it
      expresses a USL, even if it contains only one
      category.  The slash(es) can appear before and/or
      after the category expression(s), as well as between
      them.

    "Translator" role re-named "medium".

    Roles re-ordered to: (1) "source" (2) "medium" (3)
    "destination".

    The "fillWith" feature now handles all layers the same
        way.  (In Version 1, the 3rd role was always filled
        with Empty at Layers 0 and 1.)

    Layers re-numbered so that the primitive layer is Layer
    0.

    Layer names changed as follows:
       "primitive" changed to "L0"
           "event" changed to "L1"
        "relation" changed to "L2"
            "idea" changed to "L3"
          "phrase" changed to "L4"
            "seme" changed to "L5"

    Added #IMPLIED "tokenDisp" attribute to many XML
    element types, to aid in debugging STAR expressions.

    Changed "first" and "last" attributes from #REQUIRED to
    #IMPLIED.

  2008-11-20: Fixed bug in error message about missing
  parens around groups.

Version 3 delivered 2008-11-22.

  Added support for the +, #, and @ (sequence_union,
  sequence_difference, sequence intersection) operators,
  and added corresponding element types to the DTD.
  Changed the names of the existing union, difference, and
  intersection operators and corresponding XML element
  types to primitive_union, primitive_difference, and
  primitive_intersection.

Version 4 delivered 2008-12-15.

  1. Two changes in comment and instantiator syntax:

     a. The constraint on the syntactic positions of
        comments and instantiators has been relaxed.  In
        previous versions, comments and instantiators could
        only appear just before a layer mark, and were
        always associated with the sequence whose terminus
        is that following layermark.  This current version
        supports the same positioning semantics as before,
        but it no longer rejects comments and instantiators
        that do not appear just before a layer mark.
        Instead, the parser adds them to the ordered list
        of comments and instantiators that are regarded as
        being associated with the entire slash-delimited
        category expression in which they appear.  This new
        feature permits the association of comments and
        instantiators with a slash-delimited category
        expression by inserting them before and/or after
        the expression (but still within the string
        delimited by slashes and/or the beginning or end of
        the entire STAR expression), or even inside the
        category expression (but not just before a
        layermark, unless it's the category expression's
        last layermark), rather than only just before its
        last layermark.

     b. Comment delimiters are no longer /$ ... $/.  Now
        they are $? ... ?$.  This is less confusing because
        slashes are now used as category expression
        delimiters in complex expressions.

  2.  Many character encodings are now supported for both
      input of STAR expressions and output of STAR-XML;
      formerly only ascii was supported.  The new
      -inputEncoding and -outputEncoding options allow these
      encodings to be specified.  The default encoding for
      both input and output is %s.  The -encodings
      option can be used to display the available
      encodings.  As a practical matter, this change
      affects only comments and instantiators; these can
      now be written in any language, including but not
      limited to the CJK languages.

  3.  The values of the tokenDisp and
      commentsAndInstantiators attributes are now encoded
      with so-called "backslash escapes".  Formerly, all
      nondisplayable characters were shown as space
      characters.  Now, these characters are displayed in
      the conventional fashion as in C, Python, and Java
      programs, i.e., as special codes with leading
      backslashes.  For example, end-of-line characters are
      shown as '\\n', tabs as '\\t', and backslashes as
      '\\\\'.  All other nondisplayable characters are
      shown as a backslash followed by three octal digits
      indicating the character's ordinal value; for
      example, the character whose ordinal value is 8
      appears as '\\010'.

  4.  Record-ends (characters that appear at the right-hand
      ends of lines of text) are internally normalized, so
      that in the XML output, given equivalent inputs, the
      values of expressionString and tokenDisp attributes,
      and the contents of <comment> and <instantiator>
      elements, will be identical regardless of whether the
      host is Linux, Apple, or Microsoft.  Internally, the
      Linux convention is used, i.e., record-ends are
      characters whose ordinal value is 10, also sometimes
      called "linefeed" characters.  (Apple typically uses
      13, sometimes called the "carriage return" character,
      and Microsoft typically uses two characters, a 13
      followed by a 10.)


Version 5 delivered 2008-12-21.

  1. The commentsAndInstantiators attribute no longer
     exists.  Instead, <ieml>, <category>, <genOp>, and
     <instantiator> elements may contain <comment>
     elements.  When an XML element contains a <comment>,
     the text contained in the <comment> is meant to be a
     remark about the USL, category, generative operand, or
     instantiator that corresponds to the containing
     element.  Similarly, <ieml>, <category> and <genOp>
     elements may now contain <instantiator> elements.  The
     text content of each such element identifies an
     instance of the class identified by the containing
     element.  The STAR syntax supported by the new version
     has been enhanced with simple rules for using the
     syntactic context of a comment or instantiator to
     specify whether it is about a whole USL, or a single
     category, or a roleplayer within a category.  The
     rules are as follows:

     (a) As in earlier versions of Starparser.py, when one
         or more comments $?...?$ and/or instantiators
         [...] is/are followed by a layermark, then they
         are all about the category whose rightmost
         character is that layermark.

     (b) In earlier versions of Starparser.py, comments and
         instantiators whose syntactic contexts did not
         conform to (a), above, were rejected.  Now,
         however, they are regarded as remarking about (or
         instantiating) the category or USL with which they
         share the same syntactic context.  This change
         brings two benefits:

         (i) It is now possible to specify comments about
             categories without having to embed the
             comments inside the category expressions, just
             before their final characters.

             Example: These two expressions are now
             equivalent:

             (a)    * / M : O :    $? comment ?$ . /  **
             (b)    * / M : O : .  $? comment ?$   /  **

             (Note that the L1 layermark (.) is in two
             different positions in the above example.)

        (ii) It is now possible to specify comments about
             entire USLs; this was not possible before.
             Such comments are placed between slash
             delimiters, with no category expressions
             between the same pair of slashes.

             *
             / M:O:. $? comment 1 ?$
             / I:.-'_
             / $? comment 2 ?$
             **

             In the above example, the comment "comment 2"
             is regarded as a comment on the USL specified
             by the entire STAR expression.

             In the XML output from the above example, the
             text "comment 2" appears as the content of a
             <comment> element that is a child of
             (contained) the <ieml> element.

     (c) When a comment appears inside an instantiator, it
         is regarded as a comment about the specified
         instantiation.

         * 
         / M : O : . [ instantiator text $? my comment ?$ ]
         **

         In the XML output, such comments appear as <comment>
         elements contained in <instantiator> elements:

         ...  <instantiator> instantiator text <comment> my
         comment </comment> </instantiator> ...

  2. The readability of the -tokens, -raw, and -o
     ("cooked") outputs has been improved in several ways.
     Perhaps the most significant readability improvement
     is in the layout of "tokenDisp" attribute values.
     Linebreaks that appear in the input Star expression
     are now honored as linebreaks in the values of these
     attributes, and the entire tokenDisp value is shown
     indented to wherever its containing element's other
     attribute values are indented.  This makes the layout
     of the output significantly easier to scan visually.
     (Previous versions of Starparser.py always showed
     these values left-justified, regardless of the
     indention levels of their containing elements.)

  3. A new '-firstLast' invocation option now allows
     control of whether the 'first' and 'last' attributes
     will be output.  These attributes are useful to
     software applications that support the interactive
     development of IEML expressions, but they are not very
     helpful to human beings who may read the STAR-XML
     output of Starparser.py.  By default, output of these
     attributes is now suppressed.  When Starparser's
     outputs will be read by interactive software
     applications, such applications may invoke Starparser
     with '-firstLast True'.

  4. A new '-tokenDisp' invocation option now allows
     control of whether the 'tokenDisp' attributes will be
     output.  These attributes are useful to human beings
     who may read the STAR-XML outputs, but they are very
     bulky, and they have little if any value as inputs to
     software applications.  By default, tokenDisp
     attributes are output.  When Starparser's output will
     be read only by software applications, it may be
     invoked with '-tokenDisp False'.

  5. Bugs have been fixed in the -inputEncoding, -outputEncoding,
     -encodings options.  If there is a "preferred MIME
     name" for an encoding, it appears in the left column
     of the -encodings display; the name in the left column
     will be made to appear as the value of the "encoding"
     parameter in the XML declaration at the beginnings of
     XML output files, regardless of whether that name or
     an alias for that name was used as the value of the
     -inputEncoding or -outputEncoding option.  Internally, UTF-8 is
     still used for all XML parsing.  

Version 6 delivered 2009-04-10

This is a complete rewrite, based on a new approach, and an
improvement of understanding of how STAR expressions are most
conveniently processed, given the demands of binary processing, and
the expected demands of the new folding language (TBD).

* Binary values might as well be calculated by the parser, since this
  is a normal part of understanding a Star expression.  The -binary
  option controls whether the parser will do this work.  

  The binary value of a seme or complex is represented as a set of
  "binsc"s.  "binsc" means "a binary straight category represented as
  an array of 8-bit integers".  In XML and JSON outputs, each binsc is
  represented as a string of hexadecimal digits.

  The parser calculates the binary results of operations whose names
  begin with 'primitive...'.  However, this version of the parser does
  not calculate the binary values of 'sequenceUnionAtL...'
  'sequenceIntersectionAtL...'  etc. operations (operations whose
  names begin with 'sequence...').  Nor does it calculate diagonals.
  In this version, binary values that depend on such operations are
  reported as "None".

* Containers need to be represented, but usually also factored out of
  the hierarchy for convenience in processing the output of the
  parser.

* Whitespace is now preserved in the outputs, so that high-fidelity
  round-tripping of Star expressions through various applications is
  now feasible.

* The XML DTD has been changed in order to allow element type names to
  be layer-specific, as opposed to having fewer, more generic element
  types used in combination with a layerNumber attribute to
  distinguish them by layer.  The DTD has also been expressed much
  more compactly by means of XML parameter entity declarations.  The
  DTD can be examined in its compact form (via the -dtdWithPEs option)
  or in its fully expanded form (-dtd).  If the -includeDTD option is
  True, the expanded form will be inserted into the XML output of the
  parser.

  As a convenience for anyone who needs to include the DTD in an HTML
  document, a preformatted HTML-ready version is available via
  -dtdonlyforhtml.

* A JSON output stream option has been added.

* A Python "pickle" output stream option has been added.

* Terminology that is more current with respect to Prof. Levy's
  research is used throughout the code and the DTD.  "Seme" is now
  used instead of "roleplayer".

* Because the parser is becoming a Web service, distinctions have been
  introduced in its output streams so they can be piped as desired by
  web service operators.  

* Implicit semes are optionally made explicit, and when they are, the
  added semes have an 'implicit' attribute whose value is True.

* Internally, XML validation is now done via the Python 'lxml'
  library, which makes the whole parser more easily portable.  There
  is a new internal 'helper' application, starprettyvalid.py, that is
  called by the parser for XML validation and XML pretty-printing, if
  requested.  The command line used by the parser to invoke the helper
  can be inspected via the -showHelperCommand option.  The version of
  the XML output that is actually used for validation can be captured
  via the -xmlAsParsed option.  Both of these options are for
  debugging the parser system.

* There are now several new '...first' and '...last' attributes for
  use by applications that need to know where are 
  (1) the operators of nongenerative operations (opFirst, opLast), 
  (2) the parameterIdentifiers of containers (piFirst, piLast),
  (3) the visible symbols of semes in Star expressions, when such
      symbols exist (symFirst, symLast)

* Special outputs for debugging the parser are available.  The -tokens
  output was already present in previous versions of the parser.  An
  additional, extremely verbose output is now also available via
  -allTokens.  The latter includes tokens that have been discarded
  during processing.

Version 7 delivered 2009-08-04

* Added layer 6.  (There are now 7 layers, counting layer 0.)  The 
  layer 6 layermark is ';' (semicolon).  A binscat at layer 6 has
  729 characters (3**6).

* Added support for non-decimal parameter identifiers.  Formerly, a
  parameter identifier could only be a string of decimal digits, e.g. 907123 .
  That's still true, but now it's *also* possible for a parameter identifier
  to be any doublequote-delimited string of alphanumerics, plus any of
  the following characters:

        . : _ -

  (Note that no whitespace characters are allowed in parameter identifiers,
  even if they are delimited by doublequotes.)

  So, all of the following examples are valid:

  Syntactic container is implicitly the whole category expression:
  / A: 0828
  / A: "0828"
  / A: "hello_there_Daddy-O"

  Syntactic container is explicitly a group beginning with ( and ending with ):
  / ( A: 0828)
  / ( A: "0828")
  / ( A: "hello_there_Daddy-O")

  Syntactic container is explicitly an undeterminedSubsetOf beginning with < and ending with >:
  / < A: 0828>
  / < A: "0828">
  / < A: "hello_there_Daddy-O">

  Syntactic container is explicitly a diagonal beginning with { and ending with }:
  / { A: 0828}
  / { A: "0828"}
  / { A: "hello_there_Daddy-O"}

* Bug fixes:
  - UndeterminedSubsetOfs cannot any longer have binary values (they register as "None")
  - the -binary option cannot be True if the -collapseContainers option is False.
  

Version 7.3 delivered 2010-05-28

* New features:
  - Added a -debug invocation parameter that aids in debugging the parser program.

* Bug fixes:
  - In USLs, some category expressions were not being recognized as such, but now
    they are.
  - Set MAXUNICODEVALUE to 0xFFFF (16-bit characters).  Starparser.py used to use
    sys.maxunicode as MAXUNICODEVALUE, but now sys.maxunicode appears to indicate
    that 32-bit characters are supported, and such high resolution is not (yet)
    needed in starparser.py.
  - The record-starts/record-ends in the values of "binary" attributes were always
    DOS-style.  Now they conform to the prevailing convention.

Version 7.5 delivered 2010-07-01

* Fixed minor bug in binary value calculator.

""" % (
            defaultEncoding,
        )


########################################
### BOOTSTRAP STUFF FROM HERE TO END ###
########################################
if __name__ == '__main__':

    import sys
    if sys.argv[ 0].endswith( 'starparser.py'):
        starparser()
    elif sys.argv[ 0].endswith( 'xml2star.py'):
        XML2STAR_VERSION = 'v%s %s' % ( xml2star_cvsRevToSoftwareRev( cvsRevision), cvsDate.split()[1])
        dtdString = '\
        <!-- XML-based IEML Syntax Document Type Definition (DTD)\n\
             version %s -->\n%s' % (
            XML2STAR_VERSION,
            dtdString,
        )
        XML2STAR_NOTICE = """
This "xml2star.py" IEML Expression Converter
converts STAR-XML representations of IEML expressions
into IEML "Star" Language expressions.
Copyright %s Pierre Levy.  Version %s.

%s

RELEASE NOTES in chronological order:

Version 1 delivered 2009-02-11.

""" % (
            cvsDateVector[ 0],
            XML2STAR_VERSION,
            LICENSE_NOTICE,
        )
        xml2star()
    else:
        sys.stderr.write( 'unrecognized program invocation: %s\n' % ( sys.argv[ 0]))
        sys.exit( 1)

