https://github.com/python/cpython/commit/bb0268f60dfe903a9bdb8d84104247a9318c6b18
commit: bb0268f60dfe903a9bdb8d84104247a9318c6b18
branch: main
author: Gregory P. Smith <[email protected]>
committer: encukou <[email protected]>
date: 2025-03-17T14:55:02+01:00
summary:
gh-131261: Update libexpat to 2.7.0 (CVE-2024-8176) (#131272)
files:
A Misc/NEWS.d/next/Security/2025-03-14-23-28-39.gh-issue-131261.0aB6nM.rst
M Misc/sbom.spdx.json
M Modules/expat/expat.h
M Modules/expat/refresh.sh
M Modules/expat/xmlparse.c
M Tools/build/.warningignore_macos
diff --git
a/Misc/NEWS.d/next/Security/2025-03-14-23-28-39.gh-issue-131261.0aB6nM.rst
b/Misc/NEWS.d/next/Security/2025-03-14-23-28-39.gh-issue-131261.0aB6nM.rst
new file mode 100644
index 00000000000000..fa56e7a452bc2a
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-03-14-23-28-39.gh-issue-131261.0aB6nM.rst
@@ -0,0 +1 @@
+Upgrade to libexpat 2.7.0
diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json
index 2bfe2a33fd1b1b..cac346f8c38eaf 100644
--- a/Misc/sbom.spdx.json
+++ b/Misc/sbom.spdx.json
@@ -48,11 +48,11 @@
"checksums": [
{
"algorithm": "SHA1",
- "checksumValue": "373cc00d87782a736970644d863ff2ebbd0e4886"
+ "checksumValue": "1128a2c7acb539a7b8613aac182983c48e8d6eb9"
},
{
"algorithm": "SHA256",
- "checksumValue":
"0f750bc336e510d14ac9a3e63fc2399f60f3f04f0061c426e86751ed5fba90e4"
+ "checksumValue":
"3f868d5366b7736096c6a53f1422cf2dca3795d04b5438fc1b54279effdab918"
}
],
"fileName": "Modules/expat/expat.h"
@@ -132,11 +132,11 @@
"checksums": [
{
"algorithm": "SHA1",
- "checksumValue": "a9b0a33b8359cfe94b23972a1605daf8dcc605d9"
+ "checksumValue": "1834a0629524eee116cf84251464fb368423fd73"
},
{
"algorithm": "SHA256",
- "checksumValue":
"19eb541460bc2ca8b87118acd3c048f6af77affbf8719ac29aa7b6c8d70f83fd"
+ "checksumValue":
"a8e123d64c0c43fcc52e70c6573db071c3d007ecdad604fbc3a84beefe2ed3a1"
}
],
"fileName": "Modules/expat/refresh.sh"
@@ -188,11 +188,11 @@
"checksums": [
{
"algorithm": "SHA1",
- "checksumValue": "3199fbd38b6fb158f73d5c8de6b6e6e3812ef803"
+ "checksumValue": "2170362ee95a94d5c489f9430c8f63a89b7fd764"
},
{
"algorithm": "SHA256",
- "checksumValue":
"c1518244dd5ea397e345d00e12cc45d42f43453ed208218559c981c97a0583e2"
+ "checksumValue":
"9da1080e3fe34cedaed53d3916978d07404dae55b3e3c2e8316abd5f13af8074"
}
],
"fileName": "Modules/expat/xmlparse.c"
diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h
index 523b37d8d5787d..192cfd3f07eb7e 100644
--- a/Modules/expat/expat.h
+++ b/Modules/expat/expat.h
@@ -11,7 +11,7 @@
Copyright (c) 2000-2005 Fred L. Drake, Jr. <[email protected]>
Copyright (c) 2001-2002 Greg Stein <[email protected]>
Copyright (c) 2002-2016 Karl Waclawek <[email protected]>
- Copyright (c) 2016-2024 Sebastian Pipping <[email protected]>
+ Copyright (c) 2016-2025 Sebastian Pipping <[email protected]>
Copyright (c) 2016 Cristian Rodríguez <[email protected]>
Copyright (c) 2016 Thomas Beutlich <[email protected]>
Copyright (c) 2017 Rhodri James <[email protected]>
@@ -1067,8 +1067,8 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool
enabled);
See https://semver.org
*/
#define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 6
-#define XML_MICRO_VERSION 4
+#define XML_MINOR_VERSION 7
+#define XML_MICRO_VERSION 0
#ifdef __cplusplus
}
diff --git a/Modules/expat/refresh.sh b/Modules/expat/refresh.sh
index 82a9dbc23ad26b..ca1c1bdfebfefd 100755
--- a/Modules/expat/refresh.sh
+++ b/Modules/expat/refresh.sh
@@ -12,9 +12,9 @@ fi
# Update this when updating to a new version after verifying that the changes
# the update brings in are good. These values are used for verifying the SBOM,
too.
-expected_libexpat_tag="R_2_6_4"
-expected_libexpat_version="2.6.4"
-expected_libexpat_sha256="fd03b7172b3bd7427a3e7a812063f74754f24542429b634e0db6511b53fb2278"
+expected_libexpat_tag="R_2_7_0"
+expected_libexpat_version="2.7.0"
+expected_libexpat_sha256="362e89ca6b8a0d46fc5740a917eb2a8b4d6356edbe016eee09f49c0781215844"
expat_dir="$(realpath "$(dirname -- "${BASH_SOURCE[0]}")")"
cd ${expat_dir}
diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c
index a4e091e7c33c0a..376470dd80632b 100644
--- a/Modules/expat/xmlparse.c
+++ b/Modules/expat/xmlparse.c
@@ -1,4 +1,4 @@
-/* c5625880f4bf417c1463deee4eb92d86ff413f802048621c57e25fe483eb59e4 (2.6.4+)
+/* 7d6840a33c250b74adb0ba295d6ec818dccebebaffc8c3ed27d0b29c28adbeb3 (2.7.0+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@@ -13,7 +13,7 @@
Copyright (c) 2002-2016 Karl Waclawek <[email protected]>
Copyright (c) 2005-2009 Steven Solie <[email protected]>
Copyright (c) 2016 Eric Rahm <[email protected]>
- Copyright (c) 2016-2024 Sebastian Pipping <[email protected]>
+ Copyright (c) 2016-2025 Sebastian Pipping <[email protected]>
Copyright (c) 2016 Gaurav <[email protected]>
Copyright (c) 2016 Thomas Beutlich <[email protected]>
Copyright (c) 2016 Gustavo Grieco <[email protected]>
@@ -39,7 +39,7 @@
Copyright (c) 2022 Sean McBride <[email protected]>
Copyright (c) 2023 Owain Davies <[email protected]>
Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <[email protected]>
- Copyright (c) 2024 Berkay Eren Ürün <[email protected]>
+ Copyright (c) 2024-2025 Berkay Eren Ürün <[email protected]>
Copyright (c) 2024 Hanno Böck <[email protected]>
Licensed under the MIT license:
@@ -325,6 +325,10 @@ typedef struct {
const XML_Char *publicId;
const XML_Char *notation;
XML_Bool open;
+ XML_Bool hasMore; /* true if entity has not been completely processed */
+ /* An entity can be open while being already completely processed (hasMore ==
+ XML_FALSE). The reason is the delayed closing of entities until their inner
+ entities are processed and closed */
XML_Bool is_param;
XML_Bool is_internal; /* true if declared in internal subset outside PE */
} ENTITY;
@@ -415,6 +419,12 @@ typedef struct {
int *scaffIndex;
} DTD;
+enum EntityType {
+ ENTITY_INTERNAL,
+ ENTITY_ATTRIBUTE,
+ ENTITY_VALUE,
+};
+
typedef struct open_internal_entity {
const char *internalEventPtr;
const char *internalEventEndPtr;
@@ -422,6 +432,7 @@ typedef struct open_internal_entity {
ENTITY *entity;
int startTagLevel;
XML_Bool betweenDecl; /* WFC: PE Between Declarations */
+ enum EntityType type;
} OPEN_INTERNAL_ENTITY;
enum XML_Account {
@@ -481,8 +492,8 @@ static enum XML_Error doProlog(XML_Parser parser, const
ENCODING *enc,
const char *next, const char **nextPtr,
XML_Bool haveMore, XML_Bool allowClosingDoctype,
enum XML_Account account);
-static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
- XML_Bool betweenDecl);
+static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity,
+ XML_Bool betweenDecl, enum EntityType
type);
static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
const ENCODING *enc, const char *start,
const char *end, const char **endPtr,
@@ -513,18 +524,22 @@ static enum XML_Error storeAttributeValue(XML_Parser
parser,
const char *ptr, const char *end,
STRING_POOL *pool,
enum XML_Account account);
-static enum XML_Error appendAttributeValue(XML_Parser parser,
- const ENCODING *enc,
- XML_Bool isCdata, const char *ptr,
- const char *end, STRING_POOL *pool,
- enum XML_Account account);
+static enum XML_Error
+appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
+ const char *ptr, const char *end, STRING_POOL *pool,
+ enum XML_Account account, const char **nextPtr);
static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
#if XML_GE == 1
static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end,
- enum XML_Account account);
+ enum XML_Account account,
+ const char **nextPtr);
+static enum XML_Error callStoreEntityValue(XML_Parser parser,
+ const ENCODING *enc,
+ const char *start, const char *end,
+ enum XML_Account account);
#else
static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
#endif
@@ -709,6 +724,10 @@ struct XML_ParserStruct {
const char *m_positionPtr;
OPEN_INTERNAL_ENTITY *m_openInternalEntities;
OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
+ OPEN_INTERNAL_ENTITY *m_openAttributeEntities;
+ OPEN_INTERNAL_ENTITY *m_freeAttributeEntities;
+ OPEN_INTERNAL_ENTITY *m_openValueEntities;
+ OPEN_INTERNAL_ENTITY *m_freeValueEntities;
XML_Bool m_defaultExpandInternalEntities;
int m_tagLevel;
ENTITY *m_declEntity;
@@ -756,6 +775,7 @@ struct XML_ParserStruct {
ACCOUNTING m_accounting;
ENTITY_STATS m_entity_stats;
#endif
+ XML_Bool m_reenter;
};
#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
@@ -1028,7 +1048,29 @@ callProcessor(XML_Parser parser, const char *start,
const char *end,
#if defined(XML_TESTING)
g_bytesScanned += (unsigned)have_now;
#endif
- const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+ // Run in a loop to eliminate dangerous recursion depths
+ enum XML_Error ret;
+ *endPtr = start;
+ while (1) {
+ // Use endPtr as the new start in each iteration, since it will
+ // be set to the next start point by m_processor.
+ ret = parser->m_processor(parser, *endPtr, end, endPtr);
+
+ // Make parsing status (and in particular XML_SUSPENDED) take
+ // precedence over re-enter flag when they disagree
+ if (parser->m_parsingStatus.parsing != XML_PARSING) {
+ parser->m_reenter = XML_FALSE;
+ }
+
+ if (! parser->m_reenter) {
+ break;
+ }
+
+ parser->m_reenter = XML_FALSE;
+ if (ret != XML_ERROR_NONE)
+ return ret;
+ }
+
if (ret == XML_ERROR_NONE) {
// if we consumed nothing, remember what we had on this parse attempt.
if (*endPtr == start) {
@@ -1139,6 +1181,8 @@ parserCreate(const XML_Char *encodingName,
parser->m_freeBindingList = NULL;
parser->m_freeTagList = NULL;
parser->m_freeInternalEntities = NULL;
+ parser->m_freeAttributeEntities = NULL;
+ parser->m_freeValueEntities = NULL;
parser->m_groupSize = 0;
parser->m_groupConnector = NULL;
@@ -1241,6 +1285,8 @@ parserInit(XML_Parser parser, const XML_Char
*encodingName) {
parser->m_eventEndPtr = NULL;
parser->m_positionPtr = NULL;
parser->m_openInternalEntities = NULL;
+ parser->m_openAttributeEntities = NULL;
+ parser->m_openValueEntities = NULL;
parser->m_defaultExpandInternalEntities = XML_TRUE;
parser->m_tagLevel = 0;
parser->m_tagStack = NULL;
@@ -1251,6 +1297,8 @@ parserInit(XML_Parser parser, const XML_Char
*encodingName) {
parser->m_unknownEncodingData = NULL;
parser->m_parentParser = NULL;
parser->m_parsingStatus.parsing = XML_INITIALIZED;
+ // Reentry can only be triggered inside m_processor calls
+ parser->m_reenter = XML_FALSE;
#ifdef XML_DTD
parser->m_isParamEntity = XML_FALSE;
parser->m_useForeignDTD = XML_FALSE;
@@ -1310,6 +1358,24 @@ XML_ParserReset(XML_Parser parser, const XML_Char
*encodingName) {
openEntity->next = parser->m_freeInternalEntities;
parser->m_freeInternalEntities = openEntity;
}
+ /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task
but
+ * for attributes) */
+ openEntityList = parser->m_openAttributeEntities;
+ while (openEntityList) {
+ OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
+ openEntityList = openEntity->next;
+ openEntity->next = parser->m_freeAttributeEntities;
+ parser->m_freeAttributeEntities = openEntity;
+ }
+ /* move m_openValueEntities to m_freeValueEntities (i.e. same task but
+ * for value entities) */
+ openEntityList = parser->m_openValueEntities;
+ while (openEntityList) {
+ OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
+ openEntityList = openEntity->next;
+ openEntity->next = parser->m_freeValueEntities;
+ parser->m_freeValueEntities = openEntity;
+ }
moveToFreeBindingList(parser, parser->m_inheritedBindings);
FREE(parser, parser->m_unknownEncodingMem);
if (parser->m_unknownEncodingRelease)
@@ -1323,6 +1389,19 @@ XML_ParserReset(XML_Parser parser, const XML_Char
*encodingName) {
return XML_TRUE;
}
+static XML_Bool
+parserBusy(XML_Parser parser) {
+ switch (parser->m_parsingStatus.parsing) {
+ case XML_PARSING:
+ case XML_SUSPENDED:
+ return XML_TRUE;
+ case XML_INITIALIZED:
+ case XML_FINISHED:
+ default:
+ return XML_FALSE;
+ }
+}
+
enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
if (parser == NULL)
@@ -1331,8 +1410,7 @@ XML_SetEncoding(XML_Parser parser, const XML_Char
*encodingName) {
XXX There's no way for the caller to determine which of the
XXX possible error cases caused the XML_STATUS_ERROR return.
*/
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return XML_STATUS_ERROR;
/* Get rid of any previous encoding name */
@@ -1569,7 +1647,34 @@ XML_ParserFree(XML_Parser parser) {
entityList = entityList->next;
FREE(parser, openEntity);
}
-
+ /* free m_openAttributeEntities and m_freeAttributeEntities */
+ entityList = parser->m_openAttributeEntities;
+ for (;;) {
+ OPEN_INTERNAL_ENTITY *openEntity;
+ if (entityList == NULL) {
+ if (parser->m_freeAttributeEntities == NULL)
+ break;
+ entityList = parser->m_freeAttributeEntities;
+ parser->m_freeAttributeEntities = NULL;
+ }
+ openEntity = entityList;
+ entityList = entityList->next;
+ FREE(parser, openEntity);
+ }
+ /* free m_openValueEntities and m_freeValueEntities */
+ entityList = parser->m_openValueEntities;
+ for (;;) {
+ OPEN_INTERNAL_ENTITY *openEntity;
+ if (entityList == NULL) {
+ if (parser->m_freeValueEntities == NULL)
+ break;
+ entityList = parser->m_freeValueEntities;
+ parser->m_freeValueEntities = NULL;
+ }
+ openEntity = entityList;
+ entityList = entityList->next;
+ FREE(parser, openEntity);
+ }
destroyBindings(parser->m_freeBindingList, parser);
destroyBindings(parser->m_inheritedBindings, parser);
poolDestroy(&parser->m_tempPool);
@@ -1611,8 +1716,7 @@ XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
return XML_ERROR_INVALID_ARGUMENT;
#ifdef XML_DTD
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
parser->m_useForeignDTD = useDTD;
return XML_ERROR_NONE;
@@ -1627,8 +1731,7 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
if (parser == NULL)
return;
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return;
parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
}
@@ -1897,8 +2000,7 @@ XML_SetParamEntityParsing(XML_Parser parser,
if (parser == NULL)
return 0;
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return 0;
#ifdef XML_DTD
parser->m_paramEntityParsing = peParsing;
@@ -1915,8 +2017,7 @@ XML_SetHashSalt(XML_Parser parser, unsigned long
hash_salt) {
if (parser->m_parentParser)
return XML_SetHashSalt(parser->m_parentParser, hash_salt);
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return 0;
parser->m_hash_secret_salt = hash_salt;
return 1;
@@ -2230,6 +2331,11 @@ XML_GetBuffer(XML_Parser parser, int len) {
return parser->m_bufferEnd;
}
+static void
+triggerReenter(XML_Parser parser) {
+ parser->m_reenter = XML_TRUE;
+}
+
enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser, XML_Bool resumable) {
if (parser == NULL)
@@ -2704,8 +2810,9 @@ static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser, const char *start, const char *end,
const char **endPtr) {
enum XML_Error result = doContent(
- parser, 0, parser->m_encoding, start, end, endPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
+ parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end,
+ endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer,
+ XML_ACCOUNT_DIRECT);
if (result == XML_ERROR_NONE) {
if (! storeRawNames(parser))
return XML_ERROR_NO_MEMORY;
@@ -2793,6 +2900,11 @@ externalEntityInitProcessor3(XML_Parser parser, const
char *start,
return XML_ERROR_NONE;
case XML_FINISHED:
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
+ }
+ /* Fall through */
default:
start = next;
}
@@ -2966,7 +3078,7 @@ doContent(XML_Parser parser, int startTagLevel, const
ENCODING *enc,
reportDefault(parser, enc, s, next);
break;
}
- result = processInternalEntity(parser, entity, XML_FALSE);
+ result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL);
if (result != XML_ERROR_NONE)
return result;
} else if (parser->m_externalEntityRefHandler) {
@@ -3092,7 +3204,9 @@ doContent(XML_Parser parser, int startTagLevel, const
ENCODING *enc,
}
if ((parser->m_tagLevel == 0)
&& (parser->m_parsingStatus.parsing != XML_FINISHED)) {
- if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parser->m_parsingStatus.parsing == XML_SUSPENDED
+ || (parser->m_parsingStatus.parsing == XML_PARSING
+ && parser->m_reenter))
parser->m_processor = epilogProcessor;
else
return epilogProcessor(parser, next, end, nextPtr);
@@ -3153,7 +3267,9 @@ doContent(XML_Parser parser, int startTagLevel, const
ENCODING *enc,
}
if ((parser->m_tagLevel == 0)
&& (parser->m_parsingStatus.parsing != XML_FINISHED)) {
- if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parser->m_parsingStatus.parsing == XML_SUSPENDED
+ || (parser->m_parsingStatus.parsing == XML_PARSING
+ && parser->m_reenter))
parser->m_processor = epilogProcessor;
else
return epilogProcessor(parser, next, end, nextPtr);
@@ -3293,6 +3409,12 @@ doContent(XML_Parser parser, int startTagLevel, const
ENCODING *enc,
return XML_ERROR_NONE;
case XML_FINISHED:
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ *nextPtr = next;
+ return XML_ERROR_NONE;
+ }
+ /* Fall through */
default:;
}
}
@@ -4217,6 +4339,11 @@ doCdataSection(XML_Parser parser, const ENCODING *enc,
const char **startPtr,
return XML_ERROR_NONE;
case XML_FINISHED:
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
+ }
+ /* Fall through */
default:;
}
}
@@ -4549,7 +4676,7 @@ entityValueInitProcessor(XML_Parser parser, const char
*s, const char *end,
}
/* found end of entity value - can store it now */
return storeEntityValue(parser, parser->m_encoding, s, end,
- XML_ACCOUNT_DIRECT);
+ XML_ACCOUNT_DIRECT, NULL);
} else if (tok == XML_TOK_XML_DECL) {
enum XML_Error result;
result = processXmlDecl(parser, 0, start, next);
@@ -4676,7 +4803,7 @@ entityValueProcessor(XML_Parser parser, const char *s,
const char *end,
break;
}
/* found end of entity value - can store it now */
- return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
+ return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL);
}
start = next;
}
@@ -5119,9 +5246,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const
char *s, const char *end,
#if XML_GE == 1
// This will store the given replacement text in
// parser->m_declEntity->textPtr.
- enum XML_Error result
- = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
- next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
+ enum XML_Error result = callStoreEntityValue(
+ parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar,
+ XML_ACCOUNT_NONE);
if (parser->m_declEntity) {
parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
parser->m_declEntity->textLen
@@ -5546,7 +5673,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const
char *s, const char *end,
enum XML_Error result;
XML_Bool betweenDecl
= (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
- result = processInternalEntity(parser, entity, betweenDecl);
+ result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL);
if (result != XML_ERROR_NONE)
return result;
handleDefault = XML_FALSE;
@@ -5751,6 +5878,12 @@ doProlog(XML_Parser parser, const ENCODING *enc, const
char *s, const char *end,
return XML_ERROR_NONE;
case XML_FINISHED:
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ *nextPtr = next;
+ return XML_ERROR_NONE;
+ }
+ /* Fall through */
default:
s = next;
tok = XmlPrologTok(enc, s, end, &next);
@@ -5825,21 +5958,49 @@ epilogProcessor(XML_Parser parser, const char *s, const
char *end,
return XML_ERROR_NONE;
case XML_FINISHED:
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
+ }
+ /* Fall through */
default:;
}
}
}
static enum XML_Error
-processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl)
{
- const char *textStart, *textEnd;
- const char *next;
- enum XML_Error result;
- OPEN_INTERNAL_ENTITY *openEntity;
+processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl,
+ enum EntityType type) {
+ OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList;
+ switch (type) {
+ case ENTITY_INTERNAL:
+ parser->m_processor = internalEntityProcessor;
+ openEntityList = &parser->m_openInternalEntities;
+ freeEntityList = &parser->m_freeInternalEntities;
+ break;
+ case ENTITY_ATTRIBUTE:
+ openEntityList = &parser->m_openAttributeEntities;
+ freeEntityList = &parser->m_freeAttributeEntities;
+ break;
+ case ENTITY_VALUE:
+ openEntityList = &parser->m_openValueEntities;
+ freeEntityList = &parser->m_freeValueEntities;
+ break;
+ /* default case serves merely as a safety net in case of a
+ * wrong entityType. Therefore we exclude the following lines
+ * from the test coverage.
+ *
+ * LCOV_EXCL_START
+ */
+ default:
+ // Should not reach here
+ assert(0);
+ /* LCOV_EXCL_STOP */
+ }
- if (parser->m_freeInternalEntities) {
- openEntity = parser->m_freeInternalEntities;
- parser->m_freeInternalEntities = openEntity->next;
+ if (*freeEntityList) {
+ openEntity = *freeEntityList;
+ *freeEntityList = openEntity->next;
} else {
openEntity
= (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
@@ -5847,55 +6008,34 @@ processInternalEntity(XML_Parser parser, ENTITY
*entity, XML_Bool betweenDecl) {
return XML_ERROR_NO_MEMORY;
}
entity->open = XML_TRUE;
+ entity->hasMore = XML_TRUE;
#if XML_GE == 1
entityTrackingOnOpen(parser, entity, __LINE__);
#endif
entity->processed = 0;
- openEntity->next = parser->m_openInternalEntities;
- parser->m_openInternalEntities = openEntity;
+ openEntity->next = *openEntityList;
+ *openEntityList = openEntity;
openEntity->entity = entity;
+ openEntity->type = type;
openEntity->startTagLevel = parser->m_tagLevel;
openEntity->betweenDecl = betweenDecl;
openEntity->internalEventPtr = NULL;
openEntity->internalEventEndPtr = NULL;
- textStart = (const char *)entity->textPtr;
- textEnd = (const char *)(entity->textPtr + entity->textLen);
- /* Set a safe default value in case 'next' does not get set */
- next = textStart;
-
- if (entity->is_param) {
- int tok
- = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
- result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_FALSE,
- XML_ACCOUNT_ENTITY_EXPANSION);
- } else {
- result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
- textStart, textEnd, &next, XML_FALSE,
- XML_ACCOUNT_ENTITY_EXPANSION);
- }
- if (result == XML_ERROR_NONE) {
- if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- entity->processed = (int)(next - textStart);
- parser->m_processor = internalEntityProcessor;
- } else if (parser->m_openInternalEntities->entity == entity) {
-#if XML_GE == 1
- entityTrackingOnClose(parser, entity, __LINE__);
-#endif /* XML_GE == 1 */
- entity->open = XML_FALSE;
- parser->m_openInternalEntities = openEntity->next;
- /* put openEntity back in list of free instances */
- openEntity->next = parser->m_freeInternalEntities;
- parser->m_freeInternalEntities = openEntity;
- }
+ // Only internal entities make use of the reenter flag
+ // therefore no need to set it for other entity types
+ if (type == ENTITY_INTERNAL) {
+ triggerReenter(parser);
}
- return result;
+ return XML_ERROR_NONE;
}
static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
const char **nextPtr) {
+ UNUSED_P(s);
+ UNUSED_P(end);
+ UNUSED_P(nextPtr);
ENTITY *entity;
const char *textStart, *textEnd;
const char *next;
@@ -5905,68 +6045,67 @@ internalEntityProcessor(XML_Parser parser, const char
*s, const char *end,
return XML_ERROR_UNEXPECTED_STATE;
entity = openEntity->entity;
- textStart = ((const char *)entity->textPtr) + entity->processed;
- textEnd = (const char *)(entity->textPtr + entity->textLen);
- /* Set a safe default value in case 'next' does not get set */
- next = textStart;
-
- if (entity->is_param) {
- int tok
- = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
- result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_TRUE,
- XML_ACCOUNT_ENTITY_EXPANSION);
- } else {
- result = doContent(parser, openEntity->startTagLevel,
- parser->m_internalEncoding, textStart, textEnd, &next,
- XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
- }
- if (result != XML_ERROR_NONE)
- return result;
+ // This will return early
+ if (entity->hasMore) {
+ textStart = ((const char *)entity->textPtr) + entity->processed;
+ textEnd = (const char *)(entity->textPtr + entity->textLen);
+ /* Set a safe default value in case 'next' does not get set */
+ next = textStart;
+
+ if (entity->is_param) {
+ int tok
+ = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd,
&next);
+ result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
+ tok, next, &next, XML_FALSE, XML_FALSE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+ } else {
+ result = doContent(parser, openEntity->startTagLevel,
+ parser->m_internalEncoding, textStart, textEnd, &next,
+ XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
+ }
+
+ if (result != XML_ERROR_NONE)
+ return result;
+ // Check if entity is complete, if not, mark down how much of it is
+ // processed
+ if (textEnd != next
+ && (parser->m_parsingStatus.parsing == XML_SUSPENDED
+ || (parser->m_parsingStatus.parsing == XML_PARSING
+ && parser->m_reenter))) {
+ entity->processed = (int)(next - (const char *)entity->textPtr);
+ return result;
+ }
- if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- entity->processed = (int)(next - (const char *)entity->textPtr);
+ // Entity is complete. We cannot close it here since we need to first
+ // process its possible inner entities (which are added to the
+ // m_openInternalEntities during doProlog or doContent calls above)
+ entity->hasMore = XML_FALSE;
+ triggerReenter(parser);
return result;
- }
+ } // End of entity processing, "if" block will return here
+ // Remove fully processed openEntity from open entity list.
#if XML_GE == 1
entityTrackingOnClose(parser, entity, __LINE__);
#endif
+ // openEntity is m_openInternalEntities' head, as we set it at the start of
+ // this function and we skipped doProlog and doContent calls with hasMore set
+ // to false. This means we can directly remove the head of
+ // m_openInternalEntities
+ assert(parser->m_openInternalEntities == openEntity);
entity->open = XML_FALSE;
- parser->m_openInternalEntities = openEntity->next;
+ parser->m_openInternalEntities = parser->m_openInternalEntities->next;
+
/* put openEntity back in list of free instances */
openEntity->next = parser->m_freeInternalEntities;
parser->m_freeInternalEntities = openEntity;
- // If there are more open entities we want to stop right here and have the
- // upcoming call to XML_ResumeParser continue with entity content, or it
would
- // be ignored altogether.
- if (parser->m_openInternalEntities != NULL
- && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- return XML_ERROR_NONE;
- }
-
- if (entity->is_param) {
- int tok;
- parser->m_processor = prologProcessor;
- tok = XmlPrologTok(parser->m_encoding, s, end, &next);
- return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
- XML_ACCOUNT_DIRECT);
- } else {
- parser->m_processor = contentProcessor;
- /* see externalEntityContentProcessor vs contentProcessor */
- result = doContent(parser, parser->m_parentParser ? 1 : 0,
- parser->m_encoding, s, end, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer,
- XML_ACCOUNT_DIRECT);
- if (result == XML_ERROR_NONE) {
- if (! storeRawNames(parser))
- return XML_ERROR_NO_MEMORY;
- }
- return result;
+ if (parser->m_openInternalEntities == NULL) {
+ parser->m_processor = entity->is_param ? prologProcessor :
contentProcessor;
}
+ triggerReenter(parser);
+ return XML_ERROR_NONE;
}
static enum XML_Error PTRCALL
@@ -5982,8 +6121,70 @@ static enum XML_Error
storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
const char *ptr, const char *end, STRING_POOL *pool,
enum XML_Account account) {
- enum XML_Error result
- = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
+ const char *next = ptr;
+ enum XML_Error result = XML_ERROR_NONE;
+
+ while (1) {
+ if (! parser->m_openAttributeEntities) {
+ result = appendAttributeValue(parser, enc, isCdata, next, end, pool,
+ account, &next);
+ } else {
+ OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities;
+ if (! openEntity)
+ return XML_ERROR_UNEXPECTED_STATE;
+
+ ENTITY *const entity = openEntity->entity;
+ const char *const textStart
+ = ((const char *)entity->textPtr) + entity->processed;
+ const char *const textEnd
+ = (const char *)(entity->textPtr + entity->textLen);
+ /* Set a safe default value in case 'next' does not get set */
+ const char *nextInEntity = textStart;
+ if (entity->hasMore) {
+ result = appendAttributeValue(
+ parser, parser->m_internalEncoding, isCdata, textStart, textEnd,
+ pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity);
+ if (result != XML_ERROR_NONE)
+ break;
+ // Check if entity is complete, if not, mark down how much of it is
+ // processed. A XML_SUSPENDED check here is not required as
+ // appendAttributeValue will never suspend the parser.
+ if (textEnd != nextInEntity) {
+ entity->processed
+ = (int)(nextInEntity - (const char *)entity->textPtr);
+ continue;
+ }
+
+ // Entity is complete. We cannot close it here since we need to first
+ // process its possible inner entities (which are added to the
+ // m_openAttributeEntities during appendAttributeValue)
+ entity->hasMore = XML_FALSE;
+ continue;
+ } // End of entity processing, "if" block skips the rest
+
+ // Remove fully processed openEntity from open entity list.
+#if XML_GE == 1
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif
+ // openEntity is m_openAttributeEntities' head, since we set it at the
+ // start of this function and because we skipped appendAttributeValue
call
+ // with hasMore set to false. This means we can directly remove the head
+ // of m_openAttributeEntities
+ assert(parser->m_openAttributeEntities == openEntity);
+ entity->open = XML_FALSE;
+ parser->m_openAttributeEntities = parser->m_openAttributeEntities->next;
+
+ /* put openEntity back in list of free instances */
+ openEntity->next = parser->m_freeAttributeEntities;
+ parser->m_freeAttributeEntities = openEntity;
+ }
+
+ // Break if an error occurred or there is nothing left to process
+ if (result || (parser->m_openAttributeEntities == NULL && end == next)) {
+ break;
+ }
+ }
+
if (result)
return result;
if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
@@ -5996,7 +6197,7 @@ storeAttributeValue(XML_Parser parser, const ENCODING
*enc, XML_Bool isCdata,
static enum XML_Error
appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
const char *ptr, const char *end, STRING_POOL *pool,
- enum XML_Account account) {
+ enum XML_Account account, const char **nextPtr) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
#ifndef XML_DTD
UNUSED_P(account);
@@ -6014,6 +6215,9 @@ appendAttributeValue(XML_Parser parser, const ENCODING
*enc, XML_Bool isCdata,
#endif
switch (tok) {
case XML_TOK_NONE:
+ if (nextPtr) {
+ *nextPtr = next;
+ }
return XML_ERROR_NONE;
case XML_TOK_INVALID:
if (enc == parser->m_encoding)
@@ -6154,21 +6358,11 @@ appendAttributeValue(XML_Parser parser, const ENCODING
*enc, XML_Bool isCdata,
return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
} else {
enum XML_Error result;
- const XML_Char *textEnd = entity->textPtr + entity->textLen;
- entity->open = XML_TRUE;
-#if XML_GE == 1
- entityTrackingOnOpen(parser, entity, __LINE__);
-#endif
- result = appendAttributeValue(parser, parser->m_internalEncoding,
- isCdata, (const char *)entity->textPtr,
- (const char *)textEnd, pool,
- XML_ACCOUNT_ENTITY_EXPANSION);
-#if XML_GE == 1
- entityTrackingOnClose(parser, entity, __LINE__);
-#endif
- entity->open = XML_FALSE;
- if (result)
- return result;
+ result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE);
+ if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) {
+ *nextPtr = next;
+ }
+ return result;
}
} break;
default:
@@ -6197,7 +6391,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING
*enc, XML_Bool isCdata,
static enum XML_Error
storeEntityValue(XML_Parser parser, const ENCODING *enc,
const char *entityTextPtr, const char *entityTextEnd,
- enum XML_Account account) {
+ enum XML_Account account, const char **nextPtr) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
STRING_POOL *pool = &(dtd->entityValuePool);
enum XML_Error result = XML_ERROR_NONE;
@@ -6215,8 +6409,9 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
return XML_ERROR_NO_MEMORY;
}
+ const char *next;
for (;;) {
- const char *next
+ next
= entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg
*/
int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
@@ -6278,16 +6473,8 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
} else
dtd->keepProcessing = dtd->standalone;
} else {
- entity->open = XML_TRUE;
- entityTrackingOnOpen(parser, entity, __LINE__);
- result = storeEntityValue(
- parser, parser->m_internalEncoding, (const char
*)entity->textPtr,
- (const char *)(entity->textPtr + entity->textLen),
- XML_ACCOUNT_ENTITY_EXPANSION);
- entityTrackingOnClose(parser, entity, __LINE__);
- entity->open = XML_FALSE;
- if (result)
- goto endEntityValue;
+ result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE);
+ goto endEntityValue;
}
break;
}
@@ -6375,6 +6562,81 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
# ifdef XML_DTD
parser->m_prologState.inEntityValue = oldInEntityValue;
# endif /* XML_DTD */
+ // If 'nextPtr' is given, it should be updated during the processing
+ if (nextPtr != NULL) {
+ *nextPtr = next;
+ }
+ return result;
+}
+
+static enum XML_Error
+callStoreEntityValue(XML_Parser parser, const ENCODING *enc,
+ const char *entityTextPtr, const char *entityTextEnd,
+ enum XML_Account account) {
+ const char *next = entityTextPtr;
+ enum XML_Error result = XML_ERROR_NONE;
+ while (1) {
+ if (! parser->m_openValueEntities) {
+ result
+ = storeEntityValue(parser, enc, next, entityTextEnd, account, &next);
+ } else {
+ OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities;
+ if (! openEntity)
+ return XML_ERROR_UNEXPECTED_STATE;
+
+ ENTITY *const entity = openEntity->entity;
+ const char *const textStart
+ = ((const char *)entity->textPtr) + entity->processed;
+ const char *const textEnd
+ = (const char *)(entity->textPtr + entity->textLen);
+ /* Set a safe default value in case 'next' does not get set */
+ const char *nextInEntity = textStart;
+ if (entity->hasMore) {
+ result = storeEntityValue(parser, parser->m_internalEncoding,
textStart,
+ textEnd, XML_ACCOUNT_ENTITY_EXPANSION,
+ &nextInEntity);
+ if (result != XML_ERROR_NONE)
+ break;
+ // Check if entity is complete, if not, mark down how much of it is
+ // processed. A XML_SUSPENDED check here is not required as
+ // appendAttributeValue will never suspend the parser.
+ if (textEnd != nextInEntity) {
+ entity->processed
+ = (int)(nextInEntity - (const char *)entity->textPtr);
+ continue;
+ }
+
+ // Entity is complete. We cannot close it here since we need to first
+ // process its possible inner entities (which are added to the
+ // m_openValueEntities during storeEntityValue)
+ entity->hasMore = XML_FALSE;
+ continue;
+ } // End of entity processing, "if" block skips the rest
+
+ // Remove fully processed openEntity from open entity list.
+# if XML_GE == 1
+ entityTrackingOnClose(parser, entity, __LINE__);
+# endif
+ // openEntity is m_openValueEntities' head, since we set it at the
+ // start of this function and because we skipped storeEntityValue call
+ // with hasMore set to false. This means we can directly remove the head
+ // of m_openValueEntities
+ assert(parser->m_openValueEntities == openEntity);
+ entity->open = XML_FALSE;
+ parser->m_openValueEntities = parser->m_openValueEntities->next;
+
+ /* put openEntity back in list of free instances */
+ openEntity->next = parser->m_freeValueEntities;
+ parser->m_freeValueEntities = openEntity;
+ }
+
+ // Break if an error occurred or there is nothing left to process
+ if (result
+ || (parser->m_openValueEntities == NULL && entityTextEnd == next)) {
+ break;
+ }
+ }
+
return result;
}
@@ -8542,11 +8804,13 @@ unsignedCharToPrintable(unsigned char c) {
return "\\xFE";
case 255:
return "\\xFF";
+ // LCOV_EXCL_START
default:
assert(0); /* never gets here */
return "dead code";
}
assert(0); /* never gets here */
+ // LCOV_EXCL_STOP
}
#endif /* XML_GE == 1 */
diff --git a/Tools/build/.warningignore_macos b/Tools/build/.warningignore_macos
index e72309229cc60f..d7b62bc6a43ba6 100644
--- a/Tools/build/.warningignore_macos
+++ b/Tools/build/.warningignore_macos
@@ -4,6 +4,6 @@
# Format example:
# /path/to/file (number of warnings in file)
Modules/expat/siphash.h 7
-Modules/expat/xmlparse.c 8
+Modules/expat/xmlparse.c 13
Modules/expat/xmltok.c 3
Modules/expat/xmltok_impl.c 26
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]