/**********************************************************************
 *
 * Project:  CPL - Common Portability Library
 * Purpose:  Implementation of MiniXML Parser and handling.
 * Author:   Frank Warmerdam, warmerdam@pobox.com
 *
 **********************************************************************
 * Copyright (c) 2001, Frank Warmerdam
 * Copyright (c) 2007-2013, Even Rouault <even dot rouault at spatialys.com>
 *
 * SPDX-License-Identifier: MIT
 **********************************************************************
 *
 * Independent Security Audit 2003/04/05 Andrey Kiselev:
 *   Completed audit of this module. Any documents may be parsed without
 *   buffer overflows and stack corruptions.
 *
 * Security Audit 2003/03/28 warmerda:
 *   Completed security audit.  I believe that this module may be safely used
 *   to parse, and serialize arbitrary documents provided by a potentially
 *   hostile source.
 *
 */

#include "cpl_minixml.h"

#include <cctype>
#include <climits>
#include <cstddef>
#include <cstdio>
#include <cstring>

#include <algorithm>

#include "cpl_conv.h"
#include "cpl_error.h"
#include "cpl_string.h"
#include "cpl_vsi.h"

typedef enum
{
    TNone,
    TString,
    TOpen,
    TClose,
    TEqual,
    TToken,
    TSlashClose,
    TQuestionClose,
    TComment,
    TLiteral
} XMLTokenType;

typedef struct
{
    CPLXMLNode *psFirstNode;
    CPLXMLNode *psLastChild;
} StackContext;

typedef struct
{
    const char *pszInput;
    int nInputOffset;
    int nInputLine;
    bool bInElement;
    XMLTokenType eTokenType;
    char *pszToken;
    size_t nTokenMaxSize;
    size_t nTokenSize;

    int nStackMaxSize;
    int nStackSize;
    StackContext *papsStack;

    CPLXMLNode *psFirstNode;
    CPLXMLNode *psLastNode;
} ParseContext;

static CPLXMLNode *_CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
                                     const char *pszText);

/************************************************************************/
/*                              ReadChar()                              */
/************************************************************************/

static CPL_INLINE char ReadChar(ParseContext *psContext)

{
    const char chReturn = psContext->pszInput[psContext->nInputOffset++];

    if (chReturn == '\0')
        psContext->nInputOffset--;
    else if (chReturn == 10)
        psContext->nInputLine++;

    return chReturn;
}

/************************************************************************/
/*                             UnreadChar()                             */
/************************************************************************/

static CPL_INLINE void UnreadChar(ParseContext *psContext, char chToUnread)

{
    if (chToUnread == '\0')
        return;

    CPLAssert(chToUnread == psContext->pszInput[psContext->nInputOffset - 1]);

    psContext->nInputOffset--;

    if (chToUnread == 10)
        psContext->nInputLine--;
}

/************************************************************************/
/*                           ReallocToken()                             */
/************************************************************************/

static bool ReallocToken(ParseContext *psContext)
{
    if (psContext->nTokenMaxSize > INT_MAX / 2)
    {
        CPLError(CE_Failure, CPLE_OutOfMemory,
                 "Out of memory allocating %d*2 bytes",
                 static_cast<int>(psContext->nTokenMaxSize));
        VSIFree(psContext->pszToken);
        psContext->pszToken = nullptr;
        return false;
    }

    psContext->nTokenMaxSize *= 2;
    char *pszToken = static_cast<char *>(
        VSIRealloc(psContext->pszToken, psContext->nTokenMaxSize));
    if (pszToken == nullptr)
    {
        CPLError(CE_Failure, CPLE_OutOfMemory,
                 "Out of memory allocating %d bytes",
                 static_cast<int>(psContext->nTokenMaxSize));
        VSIFree(psContext->pszToken);
        psContext->pszToken = nullptr;
        return false;
    }
    psContext->pszToken = pszToken;
    return true;
}

/************************************************************************/
/*                             AddToToken()                             */
/************************************************************************/

static CPL_INLINE bool _AddToToken(ParseContext *psContext, char chNewChar)

{
    if (psContext->nTokenSize >= psContext->nTokenMaxSize - 2)
    {
        if (!ReallocToken(psContext))
            return false;
    }

    psContext->pszToken[psContext->nTokenSize++] = chNewChar;
    psContext->pszToken[psContext->nTokenSize] = '\0';
    return true;
}

// TODO(schwehr): Remove the goto.
#define AddToToken(psContext, chNewChar)                                       \
    if (!_AddToToken(psContext, chNewChar))                                    \
        goto fail;

/************************************************************************/
/*                             ReadToken()                              */
/************************************************************************/

static XMLTokenType ReadToken(ParseContext *psContext, CPLErr &eLastErrorType)

{
    psContext->nTokenSize = 0;
    psContext->pszToken[0] = '\0';

    char chNext = ReadChar(psContext);
    while (isspace(static_cast<unsigned char>(chNext)))
        chNext = ReadChar(psContext);

    /* -------------------------------------------------------------------- */
    /*      Handle comments.                                                */
    /* -------------------------------------------------------------------- */
    if (chNext == '<' &&
        STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset, "!--"))
    {
        psContext->eTokenType = TComment;

        // Skip "!--" characters.
        ReadChar(psContext);
        ReadChar(psContext);
        ReadChar(psContext);

        while (!STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
                               "-->") &&
               (chNext = ReadChar(psContext)) != '\0')
            AddToToken(psContext, chNext);

        // Skip "-->" characters.
        ReadChar(psContext);
        ReadChar(psContext);
        ReadChar(psContext);
    }
    /* -------------------------------------------------------------------- */
    /*      Handle DOCTYPE.                                                 */
    /* -------------------------------------------------------------------- */
    else if (chNext == '<' &&
             STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
                            "!DOCTYPE"))
    {
        bool bInQuotes = false;
        psContext->eTokenType = TLiteral;

        AddToToken(psContext, '<');
        do
        {
            chNext = ReadChar(psContext);
            if (chNext == '\0')
            {
                eLastErrorType = CE_Failure;
                CPLError(eLastErrorType, CPLE_AppDefined,
                         "Parse error in DOCTYPE on or before line %d, "
                         "reached end of file without '>'.",
                         psContext->nInputLine);

                break;
            }

            /* The markup declaration block within a DOCTYPE tag consists of:
             * - a left square bracket [
             * - a list of declarations
             * - a right square bracket ]
             * Example:
             * <!DOCTYPE RootElement [ ...declarations... ]>
             */
            if (chNext == '[')
            {
                AddToToken(psContext, chNext);

                do
                {
                    chNext = ReadChar(psContext);
                    if (chNext == ']')
                        break;
                    AddToToken(psContext, chNext);
                } while (chNext != '\0' &&
                         !STARTS_WITH_CI(psContext->pszInput +
                                             psContext->nInputOffset,
                                         "]>"));

                if (chNext == '\0')
                {
                    eLastErrorType = CE_Failure;
                    CPLError(eLastErrorType, CPLE_AppDefined,
                             "Parse error in DOCTYPE on or before line %d, "
                             "reached end of file without ']'.",
                             psContext->nInputLine);
                    break;
                }

                if (chNext != ']')
                {
                    chNext = ReadChar(psContext);
                    AddToToken(psContext, chNext);

                    // Skip ">" character, will be consumed below.
                    chNext = ReadChar(psContext);
                }
            }

            if (chNext == '\"')
                bInQuotes = !bInQuotes;

            if (chNext == '>' && !bInQuotes)
            {
                AddToToken(psContext, '>');
                break;
            }

            AddToToken(psContext, chNext);
        } while (true);
    }
    /* -------------------------------------------------------------------- */
    /*      Handle CDATA.                                                   */
    /* -------------------------------------------------------------------- */
    else if (chNext == '<' &&
             STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
                            "![CDATA["))
    {
        psContext->eTokenType = TString;

        // Skip !CDATA[
        ReadChar(psContext);
        ReadChar(psContext);
        ReadChar(psContext);
        ReadChar(psContext);
        ReadChar(psContext);
        ReadChar(psContext);
        ReadChar(psContext);
        ReadChar(psContext);

        while (!STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
                               "]]>") &&
               (chNext = ReadChar(psContext)) != '\0')
            AddToToken(psContext, chNext);

        // Skip "]]>" characters.
        ReadChar(psContext);
        ReadChar(psContext);
        ReadChar(psContext);
    }
    /* -------------------------------------------------------------------- */
    /*      Simple single tokens of interest.                               */
    /* -------------------------------------------------------------------- */
    else if (chNext == '<' && !psContext->bInElement)
    {
        psContext->eTokenType = TOpen;
        psContext->bInElement = true;
    }
    else if (chNext == '>' && psContext->bInElement)
    {
        psContext->eTokenType = TClose;
        psContext->bInElement = false;
    }
    else if (chNext == '=' && psContext->bInElement)
    {
        psContext->eTokenType = TEqual;
    }
    else if (chNext == '\0')
    {
        psContext->eTokenType = TNone;
    }
    /* -------------------------------------------------------------------- */
    /*      Handle the /> token terminator.                                 */
    /* -------------------------------------------------------------------- */
    else if (chNext == '/' && psContext->bInElement &&
             psContext->pszInput[psContext->nInputOffset] == '>')
    {
        chNext = ReadChar(psContext);
        (void)chNext;
        CPLAssert(chNext == '>');

        psContext->eTokenType = TSlashClose;
        psContext->bInElement = false;
    }
    /* -------------------------------------------------------------------- */
    /*      Handle the ?> token terminator.                                 */
    /* -------------------------------------------------------------------- */
    else if (chNext == '?' && psContext->bInElement &&
             psContext->pszInput[psContext->nInputOffset] == '>')
    {
        chNext = ReadChar(psContext);
        (void)chNext;
        CPLAssert(chNext == '>');

        psContext->eTokenType = TQuestionClose;
        psContext->bInElement = false;
    }
    /* -------------------------------------------------------------------- */
    /*      Collect a quoted string.                                        */
    /* -------------------------------------------------------------------- */
    else if (psContext->bInElement && chNext == '"')
    {
        psContext->eTokenType = TString;

        while ((chNext = ReadChar(psContext)) != '"' && chNext != '\0')
            AddToToken(psContext, chNext);

        if (chNext != '"')
        {
            psContext->eTokenType = TNone;
            eLastErrorType = CE_Failure;
            CPLError(
                eLastErrorType, CPLE_AppDefined,
                "Parse error on line %d, reached EOF before closing quote.",
                psContext->nInputLine);
        }

        // Do we need to unescape it?
        if (strchr(psContext->pszToken, '&') != nullptr)
        {
            int nLength = 0;
            char *pszUnescaped =
                CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
            strcpy(psContext->pszToken, pszUnescaped);
            CPLFree(pszUnescaped);
            psContext->nTokenSize = strlen(psContext->pszToken);
        }
    }
    else if (psContext->bInElement && chNext == '\'')
    {
        psContext->eTokenType = TString;

        while ((chNext = ReadChar(psContext)) != '\'' && chNext != '\0')
            AddToToken(psContext, chNext);

        if (chNext != '\'')
        {
            psContext->eTokenType = TNone;
            eLastErrorType = CE_Failure;
            CPLError(
                eLastErrorType, CPLE_AppDefined,
                "Parse error on line %d, reached EOF before closing quote.",
                psContext->nInputLine);
        }

        // Do we need to unescape it?
        if (strchr(psContext->pszToken, '&') != nullptr)
        {
            int nLength = 0;
            char *pszUnescaped =
                CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
            strcpy(psContext->pszToken, pszUnescaped);
            CPLFree(pszUnescaped);
            psContext->nTokenSize = strlen(psContext->pszToken);
        }
    }
    /* -------------------------------------------------------------------- */
    /*      Collect an unquoted string, terminated by a open angle          */
    /*      bracket.                                                        */
    /* -------------------------------------------------------------------- */
    else if (!psContext->bInElement)
    {
        psContext->eTokenType = TString;

        AddToToken(psContext, chNext);
        while ((chNext = ReadChar(psContext)) != '<' && chNext != '\0')
            AddToToken(psContext, chNext);
        UnreadChar(psContext, chNext);

        // Do we need to unescape it?
        if (strchr(psContext->pszToken, '&') != nullptr)
        {
            int nLength = 0;
            char *pszUnescaped =
                CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
            strcpy(psContext->pszToken, pszUnescaped);
            CPLFree(pszUnescaped);
            psContext->nTokenSize = strlen(psContext->pszToken);
        }
    }

    /* -------------------------------------------------------------------- */
    /*      Collect a regular token terminated by white space, or           */
    /*      special character(s) like an equal sign.                        */
    /* -------------------------------------------------------------------- */
    else
    {
        psContext->eTokenType = TToken;

        // Add the first character to the token regardless of what it is.
        AddToToken(psContext, chNext);

        for (chNext = ReadChar(psContext);
             (chNext >= 'A' && chNext <= 'Z') ||
             (chNext >= 'a' && chNext <= 'z') || chNext == '-' ||
             chNext == '_' || chNext == '.' || chNext == ':' ||
             (chNext >= '0' && chNext <= '9');
             chNext = ReadChar(psContext))
        {
            AddToToken(psContext, chNext);
        }

        UnreadChar(psContext, chNext);
    }

    return psContext->eTokenType;

fail:
    psContext->eTokenType = TNone;
    return TNone;
}

/************************************************************************/
/*                              PushNode()                              */
/************************************************************************/

static bool PushNode(ParseContext *psContext, CPLXMLNode *psNode,
                     CPLErr &eLastErrorType)

{
    if (psContext->nStackMaxSize <= psContext->nStackSize)
    {
        // Somewhat arbitrary number.
        if (psContext->nStackMaxSize >= 10000)
        {
            eLastErrorType = CE_Failure;
            CPLError(CE_Failure, CPLE_NotSupported,
                     "XML element depth beyond 10000. Giving up");
            VSIFree(psContext->papsStack);
            psContext->papsStack = nullptr;
            return false;
        }
        psContext->nStackMaxSize += 10;

        StackContext *papsStack = static_cast<StackContext *>(
            VSIRealloc(psContext->papsStack,
                       sizeof(StackContext) * psContext->nStackMaxSize));
        if (papsStack == nullptr)
        {
            eLastErrorType = CE_Failure;
            CPLError(CE_Failure, CPLE_OutOfMemory,
                     "Out of memory allocating %d bytes",
                     static_cast<int>(sizeof(StackContext)) *
                         psContext->nStackMaxSize);
            VSIFree(psContext->papsStack);
            psContext->papsStack = nullptr;
            return false;
        }
        psContext->papsStack = papsStack;
    }
#ifdef DEBUG
    // To make Coverity happy, but cannot happen.
    if (psContext->papsStack == nullptr)
        return false;
#endif

    psContext->papsStack[psContext->nStackSize].psFirstNode = psNode;
    psContext->papsStack[psContext->nStackSize].psLastChild = nullptr;
    psContext->nStackSize++;

    return true;
}

/************************************************************************/
/*                             AttachNode()                             */
/*                                                                      */
/*      Attach the passed node as a child of the current node.          */
/*      Special handling exists for adding siblings to psFirst if       */
/*      there is nothing on the stack.                                  */
/************************************************************************/

static void AttachNode(ParseContext *psContext, CPLXMLNode *psNode)

{
    if (psContext->psFirstNode == nullptr)
    {
        psContext->psFirstNode = psNode;
        psContext->psLastNode = psNode;
    }
    else if (psContext->nStackSize == 0)
    {
        psContext->psLastNode->psNext = psNode;
        psContext->psLastNode = psNode;
    }
    else
    {
        if (psContext->papsStack[psContext->nStackSize - 1]
                .psFirstNode->psChild == nullptr)
        {
            psContext->papsStack[psContext->nStackSize - 1]
                .psFirstNode->psChild = psNode;
        }
        else
        {
            psContext->papsStack[psContext->nStackSize - 1]
                .psLastChild->psNext = psNode;
        }
        psContext->papsStack[psContext->nStackSize - 1].psLastChild = psNode;
    }
}

/************************************************************************/
/*                         CPLParseXMLString()                          */
/************************************************************************/

/**
 * \brief Parse an XML string into tree form.
 *
 * The passed document is parsed into a CPLXMLNode tree representation.
 * If the document is not well formed XML then NULL is returned, and errors
 * are reported via CPLError().  No validation beyond wellformedness is
 * done.  The CPLParseXMLFile() convenience function can be used to parse
 * from a file.
 *
 * The returned document tree is owned by the caller and should be freed
 * with CPLDestroyXMLNode() when no longer needed.
 *
 * If the document has more than one "root level" element then those after the
 * first will be attached to the first as siblings (via the psNext pointers)
 * even though there is no common parent.  A document with no XML structure
 * (no angle brackets for instance) would be considered well formed, and
 * returned as a single CXT_Text node.
 *
 * @param pszString the document to parse.
 *
 * @return parsed tree or NULL on error.
 */

CPLXMLNode *CPLParseXMLString(const char *pszString)

{
    if (pszString == nullptr)
    {
        CPLError(CE_Failure, CPLE_AppDefined,
                 "CPLParseXMLString() called with NULL pointer.");
        return nullptr;
    }

    // Save back error context.
    const CPLErr eErrClass = CPLGetLastErrorType();
    const CPLErrorNum nErrNum = CPLGetLastErrorNo();
    const CPLString osErrMsg = CPLGetLastErrorMsg();

    // Reset it now.
    CPLErrorSetState(CE_None, CPLE_AppDefined, "");

    /* -------------------------------------------------------------------- */
    /*      Check for a UTF-8 BOM and skip if found                         */
    /*                                                                      */
    /*      TODO: BOM is variable-length parameter and depends on encoding. */
    /*            Add BOM detection for other encodings.                    */
    /* -------------------------------------------------------------------- */

    // Used to skip to actual beginning of XML data.
    if ((static_cast<unsigned char>(pszString[0]) == 0xEF) &&
        (static_cast<unsigned char>(pszString[1]) == 0xBB) &&
        (static_cast<unsigned char>(pszString[2]) == 0xBF))
    {
        pszString += 3;
    }

    /* -------------------------------------------------------------------- */
    /*      Initialize parse context.                                       */
    /* -------------------------------------------------------------------- */
    ParseContext sContext;
    sContext.pszInput = pszString;
    sContext.nInputOffset = 0;
    sContext.nInputLine = 0;
    sContext.bInElement = false;
    sContext.nTokenMaxSize = 10;
    sContext.pszToken = static_cast<char *>(VSIMalloc(sContext.nTokenMaxSize));
    if (sContext.pszToken == nullptr)
        return nullptr;
    sContext.nTokenSize = 0;
    sContext.eTokenType = TNone;
    sContext.nStackMaxSize = 0;
    sContext.nStackSize = 0;
    sContext.papsStack = nullptr;
    sContext.psFirstNode = nullptr;
    sContext.psLastNode = nullptr;

#ifdef DEBUG
    bool bRecoverableError = true;
#endif
    CPLErr eLastErrorType = CE_None;

    /* ==================================================================== */
    /*      Loop reading tokens.                                            */
    /* ==================================================================== */
    while (ReadToken(&sContext, eLastErrorType) != TNone)
    {
    loop_beginning:
        /* --------------------------------------------------------------------
         */
        /*      Create a new element. */
        /* --------------------------------------------------------------------
         */
        if (sContext.eTokenType == TOpen)
        {
            if (ReadToken(&sContext, eLastErrorType) != TToken)
            {
                eLastErrorType = CE_Failure;
                CPLError(eLastErrorType, CPLE_AppDefined,
                         "Line %d: Didn't find element token after "
                         "open angle bracket.",
                         sContext.nInputLine);
                break;
            }

            CPLXMLNode *psElement = nullptr;
            if (sContext.pszToken[0] != '/')
            {
                psElement =
                    _CPLCreateXMLNode(nullptr, CXT_Element, sContext.pszToken);
                if (!psElement)
                    break;
                AttachNode(&sContext, psElement);
                if (!PushNode(&sContext, psElement, eLastErrorType))
                    break;
            }
            else
            {
                if (sContext.nStackSize == 0 ||
                    !EQUAL(sContext.pszToken + 1,
                           sContext.papsStack[sContext.nStackSize - 1]
                               .psFirstNode->pszValue))
                {
#ifdef DEBUG
                    // Makes life of fuzzers easier if we accept somewhat
                    // corrupted XML like <foo> ... </not_foo>.
                    if (CPLTestBool(
                            CPLGetConfigOption("CPL_MINIXML_RELAXED", "FALSE")))
                    {
                        eLastErrorType = CE_Warning;
                        CPLError(
                            eLastErrorType, CPLE_AppDefined,
                            "Line %d: <%.500s> doesn't have matching <%.500s>.",
                            sContext.nInputLine, sContext.pszToken,
                            sContext.pszToken + 1);
                        if (sContext.nStackSize == 0)
                            break;
                        goto end_processing_close;
                    }
                    else
#endif
                    {
                        eLastErrorType = CE_Failure;
                        CPLError(
                            eLastErrorType, CPLE_AppDefined,
                            "Line %d: <%.500s> doesn't have matching <%.500s>.",
                            sContext.nInputLine, sContext.pszToken,
                            sContext.pszToken + 1);
                        break;
                    }
                }
                else
                {
                    if (strcmp(sContext.pszToken + 1,
                               sContext.papsStack[sContext.nStackSize - 1]
                                   .psFirstNode->pszValue) != 0)
                    {
                        // TODO: At some point we could just error out like any
                        // other sane XML parser would do.
                        eLastErrorType = CE_Warning;
                        CPLError(
                            eLastErrorType, CPLE_AppDefined,
                            "Line %d: <%.500s> matches <%.500s>, but the case "
                            "isn't the same.  Going on, but this is invalid "
                            "XML that might be rejected in future versions.",
                            sContext.nInputLine,
                            sContext.papsStack[sContext.nStackSize - 1]
                                .psFirstNode->pszValue,
                            sContext.pszToken);
                    }
#ifdef DEBUG
                end_processing_close:
#endif
                    if (ReadToken(&sContext, eLastErrorType) != TClose)
                    {
                        eLastErrorType = CE_Failure;
                        CPLError(eLastErrorType, CPLE_AppDefined,
                                 "Line %d: Missing close angle bracket "
                                 "after <%.500s.",
                                 sContext.nInputLine, sContext.pszToken);
                        break;
                    }

                    // Pop element off stack
                    sContext.nStackSize--;
                }
            }
        }

        /* --------------------------------------------------------------------
         */
        /*      Add an attribute to a token. */
        /* --------------------------------------------------------------------
         */
        else if (sContext.eTokenType == TToken)
        {
            CPLXMLNode *psAttr =
                _CPLCreateXMLNode(nullptr, CXT_Attribute, sContext.pszToken);
            if (!psAttr)
                break;
            AttachNode(&sContext, psAttr);

            XMLTokenType nextToken = ReadToken(&sContext, eLastErrorType);
            if (nextToken != TEqual)
            {
                // Parse stuff like <?valbuddy_schematron
                // ../wmtsSimpleGetCapabilities.sch?>
                if (sContext.nStackSize > 0 &&
                    sContext.papsStack[sContext.nStackSize - 1]
                            .psFirstNode->pszValue[0] == '?')
                {
                    psAttr->eType = CXT_Text;
                    if (nextToken == TNone)
                        break;
                    goto loop_beginning;
                }

                eLastErrorType = CE_Failure;
                CPLError(eLastErrorType, CPLE_AppDefined,
                         "Line %d: Didn't find expected '=' for value of "
                         "attribute '%.500s'.",
                         sContext.nInputLine, psAttr->pszValue);
#ifdef DEBUG
                // Accepting an attribute without child text
                // would break too much assumptions in driver code
                bRecoverableError = false;
#endif
                break;
            }

            if (ReadToken(&sContext, eLastErrorType) == TToken)
            {
                /* TODO: at some point we could just error out like any other */
                /* sane XML parser would do */
                eLastErrorType = CE_Warning;
                CPLError(eLastErrorType, CPLE_AppDefined,
                         "Line %d: Attribute value should be single or double "
                         "quoted.  Going on, but this is invalid XML that "
                         "might be rejected in future versions.",
                         sContext.nInputLine);
            }
            else if (sContext.eTokenType != TString)
            {
                eLastErrorType = CE_Failure;
                CPLError(eLastErrorType, CPLE_AppDefined,
                         "Line %d: Didn't find expected attribute value.",
                         sContext.nInputLine);
#ifdef DEBUG
                // Accepting an attribute without child text
                // would break too much assumptions in driver code
                bRecoverableError = false;
#endif
                break;
            }

            if (!_CPLCreateXMLNode(psAttr, CXT_Text, sContext.pszToken))
                break;
        }

        /* --------------------------------------------------------------------
         */
        /*      Close the start section of an element. */
        /* --------------------------------------------------------------------
         */
        else if (sContext.eTokenType == TClose)
        {
            if (sContext.nStackSize == 0)
            {
                eLastErrorType = CE_Failure;
                CPLError(eLastErrorType, CPLE_AppDefined,
                         "Line %d: Found unbalanced '>'.", sContext.nInputLine);
                break;
            }
        }

        /* --------------------------------------------------------------------
         */
        /*      Close the start section of an element, and pop it */
        /*      immediately. */
        /* --------------------------------------------------------------------
         */
        else if (sContext.eTokenType == TSlashClose)
        {
            if (sContext.nStackSize == 0)
            {
                eLastErrorType = CE_Failure;
                CPLError(eLastErrorType, CPLE_AppDefined,
                         "Line %d: Found unbalanced '/>'.",
                         sContext.nInputLine);
                break;
            }

            sContext.nStackSize--;
        }
        /* --------------------------------------------------------------------
         */
        /*      Close the start section of a <?...?> element, and pop it */
        /*      immediately. */
        /* --------------------------------------------------------------------
         */
        else if (sContext.eTokenType == TQuestionClose)
        {
            if (sContext.nStackSize == 0)
            {
                eLastErrorType = CE_Failure;
                CPLError(eLastErrorType, CPLE_AppDefined,
                         "Line %d: Found unbalanced '?>'.",
                         sContext.nInputLine);
                break;
            }
            else if (sContext.papsStack[sContext.nStackSize - 1]
                         .psFirstNode->pszValue[0] != '?')
            {
                eLastErrorType = CE_Failure;
                CPLError(eLastErrorType, CPLE_AppDefined,
                         "Line %d: Found '?>' without matching '<?'.",
                         sContext.nInputLine);
                break;
            }

            sContext.nStackSize--;
        }
        /* --------------------------------------------------------------------
         */
        /*      Handle comments.  They are returned as a whole token with the */
        /*      prefix and postfix omitted.  No processing of white space */
        /*      will be done. */
        /* --------------------------------------------------------------------
         */
        else if (sContext.eTokenType == TComment)
        {
            CPLXMLNode *psValue =
                _CPLCreateXMLNode(nullptr, CXT_Comment, sContext.pszToken);
            if (!psValue)
                break;
            AttachNode(&sContext, psValue);
        }
        /* --------------------------------------------------------------------
         */
        /*      Handle literals.  They are returned without processing. */
        /* --------------------------------------------------------------------
         */
        else if (sContext.eTokenType == TLiteral)
        {
            CPLXMLNode *psValue =
                _CPLCreateXMLNode(nullptr, CXT_Literal, sContext.pszToken);
            if (!psValue)
                break;
            AttachNode(&sContext, psValue);
        }
        /* --------------------------------------------------------------------
         */
        /*      Add a text value node as a child of the current element. */
        /* --------------------------------------------------------------------
         */
        else if (sContext.eTokenType == TString && !sContext.bInElement)
        {
            CPLXMLNode *psValue =
                _CPLCreateXMLNode(nullptr, CXT_Text, sContext.pszToken);
            if (!psValue)
                break;
            AttachNode(&sContext, psValue);
        }
        /* --------------------------------------------------------------------
         */
        /*      Anything else is an error. */
        /* --------------------------------------------------------------------
         */
        else
        {
            eLastErrorType = CE_Failure;
            CPLError(eLastErrorType, CPLE_AppDefined,
                     "Parse error at line %d, unexpected token:%.500s",
                     sContext.nInputLine, sContext.pszToken);
            break;
        }
    }

    /* -------------------------------------------------------------------- */
    /*      Did we pop all the way out of our stack?                        */
    /* -------------------------------------------------------------------- */
    if (CPLGetLastErrorType() != CE_Failure && sContext.nStackSize > 0 &&
        sContext.papsStack != nullptr)
    {
#ifdef DEBUG
        // Makes life of fuzzers easier if we accept somewhat corrupted XML
        // like <x> ...
        if (bRecoverableError &&
            CPLTestBool(CPLGetConfigOption("CPL_MINIXML_RELAXED", "FALSE")))
        {
            eLastErrorType = CE_Warning;
        }
        else
#endif
        {
            eLastErrorType = CE_Failure;
        }
        CPLError(
            eLastErrorType, CPLE_AppDefined,
            "Parse error at EOF, not all elements have been closed, "
            "starting with %.500s",
            sContext.papsStack[sContext.nStackSize - 1].psFirstNode->pszValue);
    }

    /* -------------------------------------------------------------------- */
    /*      Cleanup                                                         */
    /* -------------------------------------------------------------------- */
    CPLFree(sContext.pszToken);
    if (sContext.papsStack != nullptr)
        CPLFree(sContext.papsStack);

    // We do not trust CPLGetLastErrorType() as if CPLTurnFailureIntoWarning()
    // has been set we would never get failures
    if (eLastErrorType == CE_Failure)
    {
        CPLDestroyXMLNode(sContext.psFirstNode);
        sContext.psFirstNode = nullptr;
        sContext.psLastNode = nullptr;
    }

    if (eLastErrorType == CE_None)
    {
        // Restore initial error state.
        CPLErrorSetState(eErrClass, nErrNum, osErrMsg);
    }

    return sContext.psFirstNode;
}

/************************************************************************/
/*                            _GrowBuffer()                             */
/************************************************************************/

static bool _GrowBuffer(size_t nNeeded, char **ppszText, size_t *pnMaxLength)

{
    if (nNeeded + 1 >= *pnMaxLength)
    {
        *pnMaxLength = std::max(*pnMaxLength * 2, nNeeded + 1);
        char *pszTextNew =
            static_cast<char *>(VSIRealloc(*ppszText, *pnMaxLength));
        if (pszTextNew == nullptr)
            return false;
        *ppszText = pszTextNew;
    }
    return true;
}

/************************************************************************/
/*                        CPLSerializeXMLNode()                         */
/************************************************************************/

// TODO(schwehr): Rewrite this whole thing using C++ string.
// CPLSerializeXMLNode has buffer overflows.
static bool CPLSerializeXMLNode(const CPLXMLNode *psNode, int nIndent,
                                char **ppszText, size_t *pnLength,
                                size_t *pnMaxLength)

{
    if (psNode == nullptr)
        return true;

    /* -------------------------------------------------------------------- */
    /*      Ensure the buffer is plenty large to hold this additional       */
    /*      string.                                                         */
    /* -------------------------------------------------------------------- */
    *pnLength += strlen(*ppszText + *pnLength);
    if (!_GrowBuffer(strlen(psNode->pszValue) + *pnLength + 40 + nIndent,
                     ppszText, pnMaxLength))
        return false;

    /* -------------------------------------------------------------------- */
    /*      Text is just directly emitted.                                  */
    /* -------------------------------------------------------------------- */
    if (psNode->eType == CXT_Text)
    {
        char *pszEscaped =
            CPLEscapeString(psNode->pszValue, -1, CPLES_XML_BUT_QUOTES);

        CPLAssert(psNode->psChild == nullptr);

        // Escaped text might be bigger than expected.
        if (!_GrowBuffer(strlen(pszEscaped) + *pnLength, ppszText, pnMaxLength))
        {
            CPLFree(pszEscaped);
            return false;
        }
        strcat(*ppszText + *pnLength, pszEscaped);

        CPLFree(pszEscaped);
    }

    /* -------------------------------------------------------------------- */
    /*      Attributes require a little formatting.                         */
    /* -------------------------------------------------------------------- */
    else if (psNode->eType == CXT_Attribute)
    {
        CPLAssert(psNode->psChild != nullptr &&
                  psNode->psChild->eType == CXT_Text);

        snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, " %s=\"",
                 psNode->pszValue);
        *pnLength += strlen(*ppszText + *pnLength);

        char *pszEscaped =
            CPLEscapeString(psNode->psChild->pszValue, -1, CPLES_XML);

        if (!_GrowBuffer(strlen(pszEscaped) + *pnLength, ppszText, pnMaxLength))
        {
            CPLFree(pszEscaped);
            return false;
        }
        strcat(*ppszText + *pnLength, pszEscaped);

        CPLFree(pszEscaped);

        *pnLength += strlen(*ppszText + *pnLength);
        if (!_GrowBuffer(3 + *pnLength, ppszText, pnMaxLength))
            return false;
        strcat(*ppszText + *pnLength, "\"");
    }

    /* -------------------------------------------------------------------- */
    /*      Handle comment output.                                          */
    /* -------------------------------------------------------------------- */
    else if (psNode->eType == CXT_Comment)
    {
        CPLAssert(psNode->psChild == nullptr);

        for (int i = 0; i < nIndent; i++)
            (*ppszText)[(*pnLength)++] = ' ';

        snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, "<!--%s-->\n",
                 psNode->pszValue);
    }

    /* -------------------------------------------------------------------- */
    /*      Handle literal output (like <!DOCTYPE...>)                      */
    /* -------------------------------------------------------------------- */
    else if (psNode->eType == CXT_Literal)
    {
        CPLAssert(psNode->psChild == nullptr);

        for (int i = 0; i < nIndent; i++)
            (*ppszText)[(*pnLength)++] = ' ';

        strcpy(*ppszText + *pnLength, psNode->pszValue);
        strcat(*ppszText + *pnLength, "\n");
    }

    /* -------------------------------------------------------------------- */
    /*      Elements actually have to deal with general children, and       */
    /*      various formatting issues.                                      */
    /* -------------------------------------------------------------------- */
    else if (psNode->eType == CXT_Element)
    {
        if (nIndent)
            memset(*ppszText + *pnLength, ' ', nIndent);
        *pnLength += nIndent;
        (*ppszText)[*pnLength] = '\0';

        snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, "<%s",
                 psNode->pszValue);

        if (psNode->pszValue[0] == '?')
        {
            for (const CPLXMLNode *psChild = psNode->psChild;
                 psChild != nullptr; psChild = psChild->psNext)
            {
                if (psChild->eType == CXT_Text)
                {
                    *pnLength += strlen(*ppszText + *pnLength);
                    if (!_GrowBuffer(1 + *pnLength, ppszText, pnMaxLength))
                        return false;
                    strcat(*ppszText + *pnLength, " ");
                }

                if (!CPLSerializeXMLNode(psChild, 0, ppszText, pnLength,
                                         pnMaxLength))
                {
                    return false;
                }
            }
            if (!_GrowBuffer(*pnLength + 40, ppszText, pnMaxLength))
                return false;

            strcat(*ppszText + *pnLength, "?>\n");
        }
        else
        {
            bool bHasNonAttributeChildren = false;
            // Serialize *all* the attribute children, regardless of order
            for (const CPLXMLNode *psChild = psNode->psChild;
                 psChild != nullptr; psChild = psChild->psNext)
            {
                if (psChild->eType == CXT_Attribute)
                {
                    if (!CPLSerializeXMLNode(psChild, 0, ppszText, pnLength,
                                             pnMaxLength))
                        return false;
                }
                else
                    bHasNonAttributeChildren = true;
            }

            if (!bHasNonAttributeChildren)
            {
                if (!_GrowBuffer(*pnLength + 40, ppszText, pnMaxLength))
                    return false;

                strcat(*ppszText + *pnLength, " />\n");
            }
            else
            {
                bool bJustText = true;

                strcat(*ppszText + *pnLength, ">");

                for (const CPLXMLNode *psChild = psNode->psChild;
                     psChild != nullptr; psChild = psChild->psNext)
                {
                    if (psChild->eType == CXT_Attribute)
                        continue;

                    if (psChild->eType != CXT_Text && bJustText)
                    {
                        bJustText = false;
                        *pnLength += strlen(*ppszText + *pnLength);
                        if (!_GrowBuffer(1 + *pnLength, ppszText, pnMaxLength))
                            return false;
                        strcat(*ppszText + *pnLength, "\n");
                    }

                    if (!CPLSerializeXMLNode(psChild, nIndent + 2, ppszText,
                                             pnLength, pnMaxLength))
                        return false;
                }

                *pnLength += strlen(*ppszText + *pnLength);
                if (!_GrowBuffer(strlen(psNode->pszValue) + *pnLength + 40 +
                                     nIndent,
                                 ppszText, pnMaxLength))
                    return false;

                if (!bJustText)
                {
                    if (nIndent)
                        memset(*ppszText + *pnLength, ' ', nIndent);
                    *pnLength += nIndent;
                    (*ppszText)[*pnLength] = '\0';
                }

                *pnLength += strlen(*ppszText + *pnLength);
                snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength,
                         "</%s>\n", psNode->pszValue);
            }
        }
    }

    return true;
}

/************************************************************************/
/*                        CPLSerializeXMLTree()                         */
/************************************************************************/

/**
 * \brief Convert tree into string document.
 *
 * This function converts a CPLXMLNode tree representation of a document
 * into a flat string representation.  White space indentation is used
 * visually preserve the tree structure of the document.  The returned
 * document becomes owned by the caller and should be freed with CPLFree()
 * when no longer needed.
 *
 * @param psNode the node to serialize.
 *
 * @return the document on success or NULL on failure.
 */

char *CPLSerializeXMLTree(const CPLXMLNode *psNode)

{
    size_t nMaxLength = 100;
    char *pszText = static_cast<char *>(CPLCalloc(nMaxLength, sizeof(char)));
    if (pszText == nullptr)
        return nullptr;

    size_t nLength = 0;
    for (const CPLXMLNode *psThis = psNode; psThis != nullptr;
         psThis = psThis->psNext)
    {
        if (!CPLSerializeXMLNode(psThis, 0, &pszText, &nLength, &nMaxLength))
        {
            VSIFree(pszText);
            return nullptr;
        }
    }

    return pszText;
}

/************************************************************************/
/*                          CPLCreateXMLNode()                          */
/************************************************************************/

#ifdef DEBUG
static CPLXMLNode *psDummyStaticNode;
#endif

/**
 * \brief Create an document tree item.
 *
 * Create a single CPLXMLNode object with the desired value and type, and
 * attach it as a child of the indicated parent.
 *
 * @param poParent the parent to which this node should be attached as a
 * child.  May be NULL to keep as free standing.
 * @param eType the type of the newly created node
 * @param pszText the value of the newly created node
 *
 * @return the newly created node, now owned by the caller (or parent node).
 */

CPLXMLNode *CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
                             const char *pszText)

{
    auto ret = _CPLCreateXMLNode(poParent, eType, pszText);
    if (!ret)
    {
        CPLError(CE_Fatal, CPLE_OutOfMemory, "CPLCreateXMLNode() failed");
    }
    return ret;
}

/************************************************************************/
/*                         _CPLCreateXMLNode()                          */
/************************************************************************/

/* Same as CPLCreateXMLNode() but can return NULL in case of out-of-memory */
/* situation */

static CPLXMLNode *_CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
                                     const char *pszText)

{

    /* -------------------------------------------------------------------- */
    /*      Create new node.                                                */
    /* -------------------------------------------------------------------- */
    CPLXMLNode *psNode =
        static_cast<CPLXMLNode *>(VSICalloc(sizeof(CPLXMLNode), 1));
    if (psNode == nullptr)
    {
        CPLError(CE_Failure, CPLE_OutOfMemory, "Cannot allocate CPLXMLNode");
        return nullptr;
    }

    psNode->eType = eType;
    psNode->pszValue = VSIStrdup(pszText ? pszText : "");
    if (psNode->pszValue == nullptr)
    {
        CPLError(CE_Failure, CPLE_OutOfMemory,
                 "Cannot allocate psNode->pszValue");
        VSIFree(psNode);
        return nullptr;
    }

    /* -------------------------------------------------------------------- */
    /*      Attach to parent, if provided.                                  */
    /* -------------------------------------------------------------------- */
    if (poParent != nullptr)
    {
        if (poParent->psChild == nullptr)
            poParent->psChild = psNode;
        else
        {
            CPLXMLNode *psLink = poParent->psChild;
            if (psLink->psNext == nullptr && eType == CXT_Attribute &&
                psLink->eType == CXT_Text)
            {
                psNode->psNext = psLink;
                poParent->psChild = psNode;
            }
            else
            {
                while (psLink->psNext != nullptr)
                {
                    if (eType == CXT_Attribute &&
                        psLink->psNext->eType == CXT_Text)
                    {
                        psNode->psNext = psLink->psNext;
                        break;
                    }

                    psLink = psLink->psNext;
                }

                psLink->psNext = psNode;
            }
        }
    }
#ifdef DEBUG
    else
    {
        // Coverity sometimes doesn't realize that this function is passed
        // with a non NULL parent and thinks that this branch is taken, leading
        // to creating object being leak by caller. This ugly hack hopefully
        // makes it believe that someone will reference it.
        psDummyStaticNode = psNode;
    }
#endif

    return psNode;
}

/************************************************************************/
/*                         CPLDestroyXMLNode()                          */
/************************************************************************/

/**
 * \brief Destroy a tree.
 *
 * This function frees resources associated with a CPLXMLNode and all its
 * children nodes.
 *
 * @param psNode the tree to free.
 */

void CPLDestroyXMLNode(CPLXMLNode *psNode)

{
    while (psNode != nullptr)
    {
        if (psNode->pszValue != nullptr)
            CPLFree(psNode->pszValue);

        if (psNode->psChild != nullptr)
        {
            CPLXMLNode *psNext = psNode->psNext;
            psNode->psNext = psNode->psChild;
            // Move the child and its siblings as the next
            // siblings of the current node.
            if (psNext != nullptr)
            {
                CPLXMLNode *psIter = psNode->psChild;
                while (psIter->psNext != nullptr)
                    psIter = psIter->psNext;
                psIter->psNext = psNext;
            }
        }

        CPLXMLNode *psNext = psNode->psNext;

        CPLFree(psNode);

        psNode = psNext;
    }
}

/************************************************************************/
/*                           CPLSearchXMLNode()                         */
/************************************************************************/

/**
 * \brief Search for a node in document.
 *
 * Searches the children (and potentially siblings) of the documented
 * passed in for the named element or attribute.  To search following
 * siblings as well as children, prefix the pszElement name with an equal
 * sign.  This function does an in-order traversal of the document tree.
 * So it will first match against the current node, then its first child,
 * that child's first child, and so on.
 *
 * Use CPLGetXMLNode() to find a specific child, or along a specific
 * node path.
 *
 * @param psRoot the subtree to search.  This should be a node of type
 * CXT_Element.  NULL is safe.
 *
 * @param pszElement the name of the element or attribute to search for.
 *
 * @return The matching node or NULL on failure.
 */

CPLXMLNode *CPLSearchXMLNode(CPLXMLNode *psRoot, const char *pszElement)

{
    if (psRoot == nullptr || pszElement == nullptr)
        return nullptr;

    bool bSideSearch = false;

    if (*pszElement == '=')
    {
        bSideSearch = true;
        pszElement++;
    }

    /* -------------------------------------------------------------------- */
    /*      Does this node match?                                           */
    /* -------------------------------------------------------------------- */
    if ((psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute) &&
        EQUAL(pszElement, psRoot->pszValue))
        return psRoot;

    /* -------------------------------------------------------------------- */
    /*      Search children.                                                */
    /* -------------------------------------------------------------------- */
    CPLXMLNode *psChild = nullptr;
    for (psChild = psRoot->psChild; psChild != nullptr;
         psChild = psChild->psNext)
    {
        if ((psChild->eType == CXT_Element ||
             psChild->eType == CXT_Attribute) &&
            EQUAL(pszElement, psChild->pszValue))
            return psChild;

        if (psChild->psChild != nullptr)
        {
            CPLXMLNode *psResult = CPLSearchXMLNode(psChild, pszElement);
            if (psResult != nullptr)
                return psResult;
        }
    }

    /* -------------------------------------------------------------------- */
    /*      Search siblings if we are in side search mode.                  */
    /* -------------------------------------------------------------------- */
    if (bSideSearch)
    {
        for (psRoot = psRoot->psNext; psRoot != nullptr;
             psRoot = psRoot->psNext)
        {
            CPLXMLNode *psResult = CPLSearchXMLNode(psRoot, pszElement);
            if (psResult != nullptr)
                return psResult;
        }
    }

    return nullptr;
}

/************************************************************************/
/*                           CPLGetXMLNode()                            */
/************************************************************************/

/**
 * \brief Find node by path.
 *
 * Searches the document or subdocument indicated by psRoot for an element
 * (or attribute) with the given path.  The path should consist of a set of
 * element names separated by dots, not including the name of the root
 * element (psRoot).  If the requested element is not found NULL is returned.
 *
 * Attribute names may only appear as the last item in the path.
 *
 * The search is done from the root nodes children, but all intermediate
 * nodes in the path must be specified.  Searching for "name" would only find
 * a name element or attribute if it is a direct child of the root, not at any
 * level in the subdocument.
 *
 * If the pszPath is prefixed by "=" then the search will begin with the
 * root node, and its siblings, instead of the root nodes children.  This
 * is particularly useful when searching within a whole document which is
 * often prefixed by one or more "junk" nodes like the <?xml> declaration.
 *
 * @param psRoot the subtree in which to search.  This should be a node of
 * type CXT_Element.  NULL is safe.
 *
 * @param pszPath the list of element names in the path (dot separated).
 *
 * @return the requested element node, or NULL if not found.
 */

CPLXMLNode *CPLGetXMLNode(CPLXMLNode *psRoot, const char *pszPath)

{
    if (psRoot == nullptr || pszPath == nullptr)
        return nullptr;

    bool bSideSearch = false;

    if (*pszPath == '=')
    {
        bSideSearch = true;
        pszPath++;
    }

    const char *const apszTokens[2] = {pszPath, nullptr};

    // Slight optimization: avoid using CSLTokenizeStringComplex that
    // does memory allocations when it is not really necessary.
    bool bFreeTokens = false;
    char **papszTokensToFree = nullptr;
    const char *const *papszTokens;
    if (strchr(pszPath, '.'))
    {
        papszTokensToFree =
            CSLTokenizeStringComplex(pszPath, ".", FALSE, FALSE);
        papszTokens = papszTokensToFree;
        bFreeTokens = true;
    }
    else
    {
        papszTokens = apszTokens;
    }

    int iToken = 0;
    while (papszTokens[iToken] != nullptr && psRoot != nullptr)
    {
        CPLXMLNode *psChild = nullptr;

        if (bSideSearch)
        {
            psChild = psRoot;
            bSideSearch = false;
        }
        else
            psChild = psRoot->psChild;

        for (; psChild != nullptr; psChild = psChild->psNext)
        {
            if (psChild->eType != CXT_Text &&
                EQUAL(papszTokens[iToken], psChild->pszValue))
                break;
        }

        if (psChild == nullptr)
        {
            psRoot = nullptr;
            break;
        }

        psRoot = psChild;
        iToken++;
    }

    if (bFreeTokens)
        CSLDestroy(papszTokensToFree);
    return psRoot;
}

/************************************************************************/
/*                           CPLGetXMLValue()                           */
/************************************************************************/

/**
 * \brief Fetch element/attribute value.
 *
 * Searches the document for the element/attribute value associated with
 * the path.  The corresponding node is internally found with CPLGetXMLNode()
 * (see there for details on path handling).  Once found, the value is
 * considered to be the first CXT_Text child of the node.
 *
 * If the attribute/element search fails, or if the found node has no
 * value then the passed default value is returned.
 *
 * The returned value points to memory within the document tree, and should
 * not be altered or freed.
 *
 * @param psRoot the subtree in which to search.  This should be a node of
 * type CXT_Element.  NULL is safe.
 *
 * @param pszPath the list of element names in the path (dot separated).  An
 * empty path means get the value of the psRoot node.
 *
 * @param pszDefault the value to return if a corresponding value is not
 * found, may be NULL.
 *
 * @return the requested value or pszDefault if not found.
 */

const char *CPLGetXMLValue(const CPLXMLNode *psRoot, const char *pszPath,
                           const char *pszDefault)

{
    const CPLXMLNode *psTarget = nullptr;

    if (pszPath == nullptr || *pszPath == '\0')
        psTarget = psRoot;
    else
        psTarget = CPLGetXMLNode(psRoot, pszPath);

    if (psTarget == nullptr)
        return pszDefault;

    if (psTarget->eType == CXT_Attribute)
    {
        CPLAssert(psTarget->psChild != nullptr &&
                  psTarget->psChild->eType == CXT_Text);

        return psTarget->psChild->pszValue;
    }

    if (psTarget->eType == CXT_Element)
    {
        // Find first non-attribute child, and verify it is a single text
        // with no siblings.

        psTarget = psTarget->psChild;

        while (psTarget != nullptr && psTarget->eType == CXT_Attribute)
            psTarget = psTarget->psNext;

        if (psTarget != nullptr && psTarget->eType == CXT_Text &&
            psTarget->psNext == nullptr)
            return psTarget->pszValue;
    }

    return pszDefault;
}

/************************************************************************/
/*                           CPLAddXMLChild()                           */
/************************************************************************/

/**
 * \brief Add child node to parent.
 *
 * The passed child is added to the list of children of the indicated
 * parent.  Normally the child is added at the end of the parents child
 * list, but attributes (CXT_Attribute) will be inserted after any other
 * attributes but before any other element type.  Ownership of the child
 * node is effectively assumed by the parent node.   If the child has
 * siblings (its psNext is not NULL) they will be trimmed, but if the child
 * has children they are carried with it.
 *
 * @param psParent the node to attach the child to.  May not be NULL.
 *
 * @param psChild the child to add to the parent.  May not be NULL.  Should
 * not be a child of any other parent.
 */

void CPLAddXMLChild(CPLXMLNode *psParent, CPLXMLNode *psChild)

{
    if (psParent->psChild == nullptr)
    {
        psParent->psChild = psChild;
        return;
    }

    // Insert at head of list if first child is not attribute.
    if (psChild->eType == CXT_Attribute &&
        psParent->psChild->eType != CXT_Attribute)
    {
        psChild->psNext = psParent->psChild;
        psParent->psChild = psChild;
        return;
    }

    // Search for end of list.
    CPLXMLNode *psSib = nullptr;
    for (psSib = psParent->psChild; psSib->psNext != nullptr;
         psSib = psSib->psNext)
    {
        // Insert attributes if the next node is not an attribute.
        if (psChild->eType == CXT_Attribute && psSib->psNext != nullptr &&
            psSib->psNext->eType != CXT_Attribute)
        {
            psChild->psNext = psSib->psNext;
            psSib->psNext = psChild;
            return;
        }
    }

    psSib->psNext = psChild;
}

/************************************************************************/
/*                        CPLRemoveXMLChild()                           */
/************************************************************************/

/**
 * \brief Remove child node from parent.
 *
 * The passed child is removed from the child list of the passed parent,
 * but the child is not destroyed.  The child retains ownership of its
 * own children, but is cleanly removed from the child list of the parent.
 *
 * @param psParent the node to the child is attached to.
 *
 * @param psChild the child to remove.
 *
 * @return TRUE on success or FALSE if the child was not found.
 */

int CPLRemoveXMLChild(CPLXMLNode *psParent, CPLXMLNode *psChild)

{
    if (psParent == nullptr)
        return FALSE;

    CPLXMLNode *psLast = nullptr;
    CPLXMLNode *psThis = nullptr;
    for (psThis = psParent->psChild; psThis != nullptr; psThis = psThis->psNext)
    {
        if (psThis == psChild)
        {
            if (psLast == nullptr)
                psParent->psChild = psThis->psNext;
            else
                psLast->psNext = psThis->psNext;

            psThis->psNext = nullptr;
            return TRUE;
        }
        psLast = psThis;
    }

    return FALSE;
}

/************************************************************************/
/*                          CPLAddXMLSibling()                          */
/************************************************************************/

/**
 * \brief Add new sibling.
 *
 * The passed psNewSibling is added to the end of siblings of the
 * psOlderSibling node.  That is, it is added to the end of the psNext
 * chain.  There is no special handling if psNewSibling is an attribute.
 * If this is required, use CPLAddXMLChild().
 *
 * @param psOlderSibling the node to attach the sibling after.
 *
 * @param psNewSibling the node to add at the end of psOlderSiblings psNext
 * chain.
 */

void CPLAddXMLSibling(CPLXMLNode *psOlderSibling, CPLXMLNode *psNewSibling)

{
    if (psOlderSibling == nullptr)
        return;

    while (psOlderSibling->psNext != nullptr)
        psOlderSibling = psOlderSibling->psNext;

    psOlderSibling->psNext = psNewSibling;
}

/************************************************************************/
/*                    CPLCreateXMLElementAndValue()                     */
/************************************************************************/

/**
 * \brief Create an element and text value.
 *
 * This is function is a convenient short form for:
 *
 * \code
 *     CPLXMLNode *psTextNode;
 *     CPLXMLNode *psElementNode;
 *
 *     psElementNode = CPLCreateXMLNode( psParent, CXT_Element, pszName );
 *     psTextNode = CPLCreateXMLNode( psElementNode, CXT_Text, pszValue );
 *
 *     return psElementNode;
 * \endcode
 *
 * It creates a CXT_Element node, with a CXT_Text child, and
 * attaches the element to the passed parent.
 *
 * @param psParent the parent node to which the resulting node should
 * be attached.  May be NULL to keep as freestanding.
 *
 * @param pszName the element name to create.
 * @param pszValue the text to attach to the element. Must not be NULL.
 *
 * @return the pointer to the new element node.
 */

CPLXMLNode *CPLCreateXMLElementAndValue(CPLXMLNode *psParent,
                                        const char *pszName,
                                        const char *pszValue)

{
    CPLXMLNode *psElementNode =
        CPLCreateXMLNode(psParent, CXT_Element, pszName);
    CPLCreateXMLNode(psElementNode, CXT_Text, pszValue);

    return psElementNode;
}

/************************************************************************/
/*                    CPLCreateXMLElementAndValue()                     */
/************************************************************************/

/**
 * \brief Create an attribute and text value.
 *
 * This is function is a convenient short form for:
 *
 * \code
 *   CPLXMLNode *psAttributeNode;
 *
 *   psAttributeNode = CPLCreateXMLNode( psParent, CXT_Attribute, pszName );
 *   CPLCreateXMLNode( psAttributeNode, CXT_Text, pszValue );
 * \endcode
 *
 * It creates a CXT_Attribute node, with a CXT_Text child, and
 * attaches the element to the passed parent.
 *
 * @param psParent the parent node to which the resulting node should
 * be attached.  Must not be NULL.
 * @param pszName the attribute name to create.
 * @param pszValue the text to attach to the attribute. Must not be NULL.
 *
 */

void CPLAddXMLAttributeAndValue(CPLXMLNode *psParent, const char *pszName,
                                const char *pszValue)
{
    CPLAssert(psParent != nullptr);
    CPLXMLNode *psAttributeNode =
        CPLCreateXMLNode(psParent, CXT_Attribute, pszName);
    CPLCreateXMLNode(psAttributeNode, CXT_Text, pszValue);
}

/************************************************************************/
/*                          CPLCloneXMLTree()                           */
/************************************************************************/

/**
 * \brief Copy tree.
 *
 * Creates a deep copy of a CPLXMLNode tree.
 *
 * @param psTree the tree to duplicate.
 *
 * @return a copy of the whole tree.
 */

CPLXMLNode *CPLCloneXMLTree(const CPLXMLNode *psTree)

{
    CPLXMLNode *psPrevious = nullptr;
    CPLXMLNode *psReturn = nullptr;

    while (psTree != nullptr)
    {
        CPLXMLNode *psCopy =
            CPLCreateXMLNode(nullptr, psTree->eType, psTree->pszValue);
        if (psReturn == nullptr)
            psReturn = psCopy;
        if (psPrevious != nullptr)
            psPrevious->psNext = psCopy;

        if (psTree->psChild != nullptr)
            psCopy->psChild = CPLCloneXMLTree(psTree->psChild);

        psPrevious = psCopy;
        psTree = psTree->psNext;
    }

    return psReturn;
}

/************************************************************************/
/*                           CPLSetXMLValue()                           */
/************************************************************************/

/**
 * \brief Set element value by path.
 *
 * Find (or create) the target element or attribute specified in the
 * path, and assign it the indicated value.
 *
 * Any path elements that do not already exist will be created.  The target
 * nodes value (the first CXT_Text child) will be replaced with the provided
 * value.
 *
 * If the target node is an attribute instead of an element, the name
 * should be prefixed with a #.
 *
 * Example:
 *   CPLSetXMLValue( "Citation.Id.Description", "DOQ dataset" );
 *   CPLSetXMLValue( "Citation.Id.Description.#name", "doq" );
 *
 * @param psRoot the subdocument to be updated.
 *
 * @param pszPath the dot separated path to the target element/attribute.
 *
 * @param pszValue the text value to assign.
 *
 * @return TRUE on success.
 */

int CPLSetXMLValue(CPLXMLNode *psRoot, const char *pszPath,
                   const char *pszValue)

{
    char **papszTokens = CSLTokenizeStringComplex(pszPath, ".", FALSE, FALSE);
    int iToken = 0;

    while (papszTokens[iToken] != nullptr)
    {
        bool bIsAttribute = false;
        const char *pszName = papszTokens[iToken];

        if (pszName[0] == '#')
        {
            bIsAttribute = true;
            pszName++;
        }

        if (psRoot->eType != CXT_Element)
        {
            CSLDestroy(papszTokens);
            return FALSE;
        }

        CPLXMLNode *psChild = nullptr;
        for (psChild = psRoot->psChild; psChild != nullptr;
             psChild = psChild->psNext)
        {
            if (psChild->eType != CXT_Text && EQUAL(pszName, psChild->pszValue))
                break;
        }

        if (psChild == nullptr)
        {
            if (bIsAttribute)
                psChild = CPLCreateXMLNode(psRoot, CXT_Attribute, pszName);
            else
                psChild = CPLCreateXMLNode(psRoot, CXT_Element, pszName);
        }

        psRoot = psChild;
        iToken++;
    }

    CSLDestroy(papszTokens);

    /* -------------------------------------------------------------------- */
    /*      Find the "text" child if there is one.                          */
    /* -------------------------------------------------------------------- */
    CPLXMLNode *psTextChild = psRoot->psChild;

    while (psTextChild != nullptr && psTextChild->eType != CXT_Text)
        psTextChild = psTextChild->psNext;

    /* -------------------------------------------------------------------- */
    /*      Now set a value node under this node.                           */
    /* -------------------------------------------------------------------- */

    if (psTextChild == nullptr)
        CPLCreateXMLNode(psRoot, CXT_Text, pszValue);
    else
    {
        CPLFree(psTextChild->pszValue);
        psTextChild->pszValue = CPLStrdup(pszValue);
    }

    return TRUE;
}

/************************************************************************/
/*                        CPLStripXMLNamespace()                        */
/************************************************************************/

/**
 * \brief Strip indicated namespaces.
 *
 * The subdocument (psRoot) is recursively examined, and any elements
 * with the indicated namespace prefix will have the namespace prefix
 * stripped from the element names.  If the passed namespace is NULL, then
 * all namespace prefixes will be stripped.
 *
 * Nodes other than elements should remain unaffected.  The changes are
 * made "in place", and should not alter any node locations, only the
 * pszValue field of affected nodes.
 *
 * @param psRoot the document to operate on.
 * @param pszNamespace the name space prefix (not including colon), or NULL.
 * @param bRecurse TRUE to recurse over whole document, or FALSE to only
 * operate on the passed node.
 */

void CPLStripXMLNamespace(CPLXMLNode *psRoot, const char *pszNamespace,
                          int bRecurse)

{
    size_t nNameSpaceLen = (pszNamespace) ? strlen(pszNamespace) : 0;

    while (psRoot != nullptr)
    {
        if (psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute)
        {
            if (pszNamespace != nullptr)
            {
                if (EQUALN(pszNamespace, psRoot->pszValue, nNameSpaceLen) &&
                    psRoot->pszValue[nNameSpaceLen] == ':')
                {
                    memmove(psRoot->pszValue,
                            psRoot->pszValue + nNameSpaceLen + 1,
                            strlen(psRoot->pszValue + nNameSpaceLen + 1) + 1);
                }
            }
            else
            {
                for (const char *pszCheck = psRoot->pszValue; *pszCheck != '\0';
                     pszCheck++)
                {
                    if (*pszCheck == ':')
                    {
                        memmove(psRoot->pszValue, pszCheck + 1,
                                strlen(pszCheck + 1) + 1);
                        break;
                    }
                }
            }
        }

        if (bRecurse)
        {
            if (psRoot->psChild != nullptr)
                CPLStripXMLNamespace(psRoot->psChild, pszNamespace, 1);

            psRoot = psRoot->psNext;
        }
        else
        {
            break;
        }
    }
}

/************************************************************************/
/*                          CPLParseXMLFile()                           */
/************************************************************************/

/**
 * \brief Parse XML file into tree.
 *
 * The named file is opened, loaded into memory as a big string, and
 * parsed with CPLParseXMLString().  Errors in reading the file or parsing
 * the XML will be reported by CPLError().
 *
 * The "large file" API is used, so XML files can come from virtualized
 * files.
 *
 * @param pszFilename the file to open.
 *
 * @return NULL on failure, or the document tree on success.
 */

CPLXMLNode *CPLParseXMLFile(const char *pszFilename)

{
    /* -------------------------------------------------------------------- */
    /*      Ingest the file.                                                */
    /* -------------------------------------------------------------------- */
    GByte *pabyOut = nullptr;
    if (!VSIIngestFile(nullptr, pszFilename, &pabyOut, nullptr, -1))
        return nullptr;

    char *pszDoc = reinterpret_cast<char *>(pabyOut);

    /* -------------------------------------------------------------------- */
    /*      Parse it.                                                       */
    /* -------------------------------------------------------------------- */
    CPLXMLNode *psTree = CPLParseXMLString(pszDoc);
    CPLFree(pszDoc);

    return psTree;
}

/************************************************************************/
/*                     CPLSerializeXMLTreeToFile()                      */
/************************************************************************/

/**
 * \brief Write document tree to a file.
 *
 * The passed document tree is converted into one big string (with
 * CPLSerializeXMLTree()) and then written to the named file.  Errors writing
 * the file will be reported by CPLError().  The source document tree is
 * not altered.  If the output file already exists it will be overwritten.
 *
 * @param psTree the document tree to write.
 * @param pszFilename the name of the file to write to.
 * @return TRUE on success, FALSE otherwise.
 */

int CPLSerializeXMLTreeToFile(const CPLXMLNode *psTree, const char *pszFilename)

{
    /* -------------------------------------------------------------------- */
    /*      Serialize document.                                             */
    /* -------------------------------------------------------------------- */
    char *pszDoc = CPLSerializeXMLTree(psTree);
    if (pszDoc == nullptr)
        return FALSE;

    const vsi_l_offset nLength = strlen(pszDoc);

    /* -------------------------------------------------------------------- */
    /*      Create file.                                                    */
    /* -------------------------------------------------------------------- */
    VSILFILE *fp = VSIFOpenL(pszFilename, "wt");
    if (fp == nullptr)
    {
        CPLError(CE_Failure, CPLE_OpenFailed, "Failed to open %.500s to write.",
                 pszFilename);
        CPLFree(pszDoc);
        return FALSE;
    }

    /* -------------------------------------------------------------------- */
    /*      Write file.                                                     */
    /* -------------------------------------------------------------------- */
    if (VSIFWriteL(pszDoc, 1, static_cast<size_t>(nLength), fp) != nLength)
    {
        CPLError(CE_Failure, CPLE_FileIO,
                 "Failed to write whole XML document (%.500s).", pszFilename);
        CPL_IGNORE_RET_VAL(VSIFCloseL(fp));
        CPLFree(pszDoc);
        return FALSE;
    }

    /* -------------------------------------------------------------------- */
    /*      Cleanup                                                         */
    /* -------------------------------------------------------------------- */
    const bool bRet = VSIFCloseL(fp) == 0;
    if (!bRet)
    {
        CPLError(CE_Failure, CPLE_FileIO,
                 "Failed to write whole XML document (%.500s).", pszFilename);
    }
    CPLFree(pszDoc);

    return bRet;
}

/************************************************************************/
/*                       CPLCleanXMLElementName()                       */
/************************************************************************/

/**
 * \brief Make string into safe XML token.
 *
 * Modifies a string in place to try and make it into a legal
 * XML token that can be used as an element name.   This is accomplished
 * by changing any characters not legal in a token into an underscore.
 *
 * NOTE: This function should implement the rules in section 2.3 of
 * http://www.w3.org/TR/xml11/ but it doesn't yet do that properly.  We
 * only do a rough approximation of that.
 *
 * @param pszTarget the string to be adjusted.  It is altered in place.
 */

void CPLCleanXMLElementName(char *pszTarget)
{
    if (pszTarget == nullptr)
        return;

    for (; *pszTarget != '\0'; pszTarget++)
    {
        if ((static_cast<unsigned char>(*pszTarget) & 0x80) ||
            isalnum(static_cast<unsigned char>(*pszTarget)) ||
            *pszTarget == '_' || *pszTarget == '.')
        {
            // Ok.
        }
        else
        {
            *pszTarget = '_';
        }
    }
}

/************************************************************************/
/*                     CPLXMLNodeGetRAMUsageEstimate()                  */
/************************************************************************/

static size_t CPLXMLNodeGetRAMUsageEstimate(const CPLXMLNode *psNode,
                                            bool bVisitSiblings)
{
    size_t nRet = sizeof(CPLXMLNode);
    // malloc() aligns on 16-byte boundaries on 64 bit.
    nRet += std::max(2 * sizeof(void *), strlen(psNode->pszValue) + 1);
    if (bVisitSiblings)
    {
        for (const CPLXMLNode *psIter = psNode->psNext; psIter;
             psIter = psIter->psNext)
        {
            nRet += CPLXMLNodeGetRAMUsageEstimate(psIter, false);
        }
    }
    if (psNode->psChild)
    {
        nRet += CPLXMLNodeGetRAMUsageEstimate(psNode->psChild, true);
    }
    return nRet;
}

/** Return a conservative estimate of the RAM usage of this node, its children
 * and siblings. The returned values is in bytes.
 *
 * @since 3.9
 */
size_t CPLXMLNodeGetRAMUsageEstimate(const CPLXMLNode *psNode)
{
    return CPLXMLNodeGetRAMUsageEstimate(psNode, true);
}

/************************************************************************/
/*            CPLXMLTreeCloser::getDocumentElement()                    */
/************************************************************************/

CPLXMLNode *CPLXMLTreeCloser::getDocumentElement()
{
    CPLXMLNode *doc = get();
    // skip the Declaration and assume the next is the root element
    while (doc != nullptr &&
           (doc->eType != CXT_Element || doc->pszValue[0] == '?'))
    {
        doc = doc->psNext;
    }
    return doc;
}
