https://github.com/python/cpython/commit/bb0268f60dfe903a9bdb8d84104247a9318c6b18 commit: bb0268f60dfe903a9bdb8d84104247a9318c6b18 branch: main author: Gregory P. Smith <g...@krypto.org> committer: encukou <encu...@gmail.com> date: 2025-03-17T14:55:02+01:00 summary:
gh-131261: Update libexpat to 2.7.0 (CVE-2024-8176) (#131272) files: A Misc/NEWS.d/next/Security/2025-03-14-23-28-39.gh-issue-131261.0aB6nM.rst M Misc/sbom.spdx.json M Modules/expat/expat.h M Modules/expat/refresh.sh M Modules/expat/xmlparse.c M Tools/build/.warningignore_macos diff --git a/Misc/NEWS.d/next/Security/2025-03-14-23-28-39.gh-issue-131261.0aB6nM.rst b/Misc/NEWS.d/next/Security/2025-03-14-23-28-39.gh-issue-131261.0aB6nM.rst new file mode 100644 index 00000000000000..fa56e7a452bc2a --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-03-14-23-28-39.gh-issue-131261.0aB6nM.rst @@ -0,0 +1 @@ +Upgrade to libexpat 2.7.0 diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index 2bfe2a33fd1b1b..cac346f8c38eaf 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -48,11 +48,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "373cc00d87782a736970644d863ff2ebbd0e4886" + "checksumValue": "1128a2c7acb539a7b8613aac182983c48e8d6eb9" }, { "algorithm": "SHA256", - "checksumValue": "0f750bc336e510d14ac9a3e63fc2399f60f3f04f0061c426e86751ed5fba90e4" + "checksumValue": "3f868d5366b7736096c6a53f1422cf2dca3795d04b5438fc1b54279effdab918" } ], "fileName": "Modules/expat/expat.h" @@ -132,11 +132,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "a9b0a33b8359cfe94b23972a1605daf8dcc605d9" + "checksumValue": "1834a0629524eee116cf84251464fb368423fd73" }, { "algorithm": "SHA256", - "checksumValue": "19eb541460bc2ca8b87118acd3c048f6af77affbf8719ac29aa7b6c8d70f83fd" + "checksumValue": "a8e123d64c0c43fcc52e70c6573db071c3d007ecdad604fbc3a84beefe2ed3a1" } ], "fileName": "Modules/expat/refresh.sh" @@ -188,11 +188,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "3199fbd38b6fb158f73d5c8de6b6e6e3812ef803" + "checksumValue": "2170362ee95a94d5c489f9430c8f63a89b7fd764" }, { "algorithm": "SHA256", - "checksumValue": "c1518244dd5ea397e345d00e12cc45d42f43453ed208218559c981c97a0583e2" + "checksumValue": "9da1080e3fe34cedaed53d3916978d07404dae55b3e3c2e8316abd5f13af8074" } ], "fileName": "Modules/expat/xmlparse.c" diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h index 523b37d8d5787d..192cfd3f07eb7e 100644 --- a/Modules/expat/expat.h +++ b/Modules/expat/expat.h @@ -11,7 +11,7 @@ Copyright (c) 2000-2005 Fred L. Drake, Jr. <fdr...@users.sourceforge.net> Copyright (c) 2001-2002 Greg Stein <gst...@users.sourceforge.net> Copyright (c) 2002-2016 Karl Waclawek <k...@waclawek.net> - Copyright (c) 2016-2024 Sebastian Pipping <sebast...@pipping.org> + Copyright (c) 2016-2025 Sebastian Pipping <sebast...@pipping.org> Copyright (c) 2016 Cristian Rodríguez <crrodrig...@opensuse.org> Copyright (c) 2016 Thomas Beutlich <t...@tbeu.de> Copyright (c) 2017 Rhodri James <rho...@wildebeest.org.uk> @@ -1067,8 +1067,8 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); See https://semver.org */ #define XML_MAJOR_VERSION 2 -#define XML_MINOR_VERSION 6 -#define XML_MICRO_VERSION 4 +#define XML_MINOR_VERSION 7 +#define XML_MICRO_VERSION 0 #ifdef __cplusplus } diff --git a/Modules/expat/refresh.sh b/Modules/expat/refresh.sh index 82a9dbc23ad26b..ca1c1bdfebfefd 100755 --- a/Modules/expat/refresh.sh +++ b/Modules/expat/refresh.sh @@ -12,9 +12,9 @@ fi # Update this when updating to a new version after verifying that the changes # the update brings in are good. These values are used for verifying the SBOM, too. -expected_libexpat_tag="R_2_6_4" -expected_libexpat_version="2.6.4" -expected_libexpat_sha256="fd03b7172b3bd7427a3e7a812063f74754f24542429b634e0db6511b53fb2278" +expected_libexpat_tag="R_2_7_0" +expected_libexpat_version="2.7.0" +expected_libexpat_sha256="362e89ca6b8a0d46fc5740a917eb2a8b4d6356edbe016eee09f49c0781215844" expat_dir="$(realpath "$(dirname -- "${BASH_SOURCE[0]}")")" cd ${expat_dir} diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c index a4e091e7c33c0a..376470dd80632b 100644 --- a/Modules/expat/xmlparse.c +++ b/Modules/expat/xmlparse.c @@ -1,4 +1,4 @@ -/* c5625880f4bf417c1463deee4eb92d86ff413f802048621c57e25fe483eb59e4 (2.6.4+) +/* 7d6840a33c250b74adb0ba295d6ec818dccebebaffc8c3ed27d0b29c28adbeb3 (2.7.0+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -13,7 +13,7 @@ Copyright (c) 2002-2016 Karl Waclawek <k...@waclawek.net> Copyright (c) 2005-2009 Steven Solie <ste...@solie.ca> Copyright (c) 2016 Eric Rahm <er...@mozilla.com> - Copyright (c) 2016-2024 Sebastian Pipping <sebast...@pipping.org> + Copyright (c) 2016-2025 Sebastian Pipping <sebast...@pipping.org> Copyright (c) 2016 Gaurav <g.gu...@samsung.com> Copyright (c) 2016 Thomas Beutlich <t...@tbeu.de> Copyright (c) 2016 Gustavo Grieco <gustavo.gri...@imag.fr> @@ -39,7 +39,7 @@ Copyright (c) 2022 Sean McBride <s...@rogue-research.com> Copyright (c) 2023 Owain Davies <owa...@bath.edu> Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <sn...@sony.com> - Copyright (c) 2024 Berkay Eren Ürün <berkay.uer...@siemens.com> + Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.uer...@siemens.com> Copyright (c) 2024 Hanno Böck <ha...@gentoo.org> Licensed under the MIT license: @@ -325,6 +325,10 @@ typedef struct { const XML_Char *publicId; const XML_Char *notation; XML_Bool open; + XML_Bool hasMore; /* true if entity has not been completely processed */ + /* An entity can be open while being already completely processed (hasMore == + XML_FALSE). The reason is the delayed closing of entities until their inner + entities are processed and closed */ XML_Bool is_param; XML_Bool is_internal; /* true if declared in internal subset outside PE */ } ENTITY; @@ -415,6 +419,12 @@ typedef struct { int *scaffIndex; } DTD; +enum EntityType { + ENTITY_INTERNAL, + ENTITY_ATTRIBUTE, + ENTITY_VALUE, +}; + typedef struct open_internal_entity { const char *internalEventPtr; const char *internalEventEndPtr; @@ -422,6 +432,7 @@ typedef struct open_internal_entity { ENTITY *entity; int startTagLevel; XML_Bool betweenDecl; /* WFC: PE Between Declarations */ + enum EntityType type; } OPEN_INTERNAL_ENTITY; enum XML_Account { @@ -481,8 +492,8 @@ static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *next, const char **nextPtr, XML_Bool haveMore, XML_Bool allowClosingDoctype, enum XML_Account account); -static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, - XML_Bool betweenDecl); +static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity, + XML_Bool betweenDecl, enum EntityType type); static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *start, const char *end, const char **endPtr, @@ -513,18 +524,22 @@ static enum XML_Error storeAttributeValue(XML_Parser parser, const char *ptr, const char *end, STRING_POOL *pool, enum XML_Account account); -static enum XML_Error appendAttributeValue(XML_Parser parser, - const ENCODING *enc, - XML_Bool isCdata, const char *ptr, - const char *end, STRING_POOL *pool, - enum XML_Account account); +static enum XML_Error +appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account, const char **nextPtr); static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType); #if XML_GE == 1 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end, - enum XML_Account account); + enum XML_Account account, + const char **nextPtr); +static enum XML_Error callStoreEntityValue(XML_Parser parser, + const ENCODING *enc, + const char *start, const char *end, + enum XML_Account account); #else static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity); #endif @@ -709,6 +724,10 @@ struct XML_ParserStruct { const char *m_positionPtr; OPEN_INTERNAL_ENTITY *m_openInternalEntities; OPEN_INTERNAL_ENTITY *m_freeInternalEntities; + OPEN_INTERNAL_ENTITY *m_openAttributeEntities; + OPEN_INTERNAL_ENTITY *m_freeAttributeEntities; + OPEN_INTERNAL_ENTITY *m_openValueEntities; + OPEN_INTERNAL_ENTITY *m_freeValueEntities; XML_Bool m_defaultExpandInternalEntities; int m_tagLevel; ENTITY *m_declEntity; @@ -756,6 +775,7 @@ struct XML_ParserStruct { ACCOUNTING m_accounting; ENTITY_STATS m_entity_stats; #endif + XML_Bool m_reenter; }; #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) @@ -1028,7 +1048,29 @@ callProcessor(XML_Parser parser, const char *start, const char *end, #if defined(XML_TESTING) g_bytesScanned += (unsigned)have_now; #endif - const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); + // Run in a loop to eliminate dangerous recursion depths + enum XML_Error ret; + *endPtr = start; + while (1) { + // Use endPtr as the new start in each iteration, since it will + // be set to the next start point by m_processor. + ret = parser->m_processor(parser, *endPtr, end, endPtr); + + // Make parsing status (and in particular XML_SUSPENDED) take + // precedence over re-enter flag when they disagree + if (parser->m_parsingStatus.parsing != XML_PARSING) { + parser->m_reenter = XML_FALSE; + } + + if (! parser->m_reenter) { + break; + } + + parser->m_reenter = XML_FALSE; + if (ret != XML_ERROR_NONE) + return ret; + } + if (ret == XML_ERROR_NONE) { // if we consumed nothing, remember what we had on this parse attempt. if (*endPtr == start) { @@ -1139,6 +1181,8 @@ parserCreate(const XML_Char *encodingName, parser->m_freeBindingList = NULL; parser->m_freeTagList = NULL; parser->m_freeInternalEntities = NULL; + parser->m_freeAttributeEntities = NULL; + parser->m_freeValueEntities = NULL; parser->m_groupSize = 0; parser->m_groupConnector = NULL; @@ -1241,6 +1285,8 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_eventEndPtr = NULL; parser->m_positionPtr = NULL; parser->m_openInternalEntities = NULL; + parser->m_openAttributeEntities = NULL; + parser->m_openValueEntities = NULL; parser->m_defaultExpandInternalEntities = XML_TRUE; parser->m_tagLevel = 0; parser->m_tagStack = NULL; @@ -1251,6 +1297,8 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_unknownEncodingData = NULL; parser->m_parentParser = NULL; parser->m_parsingStatus.parsing = XML_INITIALIZED; + // Reentry can only be triggered inside m_processor calls + parser->m_reenter = XML_FALSE; #ifdef XML_DTD parser->m_isParamEntity = XML_FALSE; parser->m_useForeignDTD = XML_FALSE; @@ -1310,6 +1358,24 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { openEntity->next = parser->m_freeInternalEntities; parser->m_freeInternalEntities = openEntity; } + /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but + * for attributes) */ + openEntityList = parser->m_openAttributeEntities; + while (openEntityList) { + OPEN_INTERNAL_ENTITY *openEntity = openEntityList; + openEntityList = openEntity->next; + openEntity->next = parser->m_freeAttributeEntities; + parser->m_freeAttributeEntities = openEntity; + } + /* move m_openValueEntities to m_freeValueEntities (i.e. same task but + * for value entities) */ + openEntityList = parser->m_openValueEntities; + while (openEntityList) { + OPEN_INTERNAL_ENTITY *openEntity = openEntityList; + openEntityList = openEntity->next; + openEntity->next = parser->m_freeValueEntities; + parser->m_freeValueEntities = openEntity; + } moveToFreeBindingList(parser, parser->m_inheritedBindings); FREE(parser, parser->m_unknownEncodingMem); if (parser->m_unknownEncodingRelease) @@ -1323,6 +1389,19 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { return XML_TRUE; } +static XML_Bool +parserBusy(XML_Parser parser) { + switch (parser->m_parsingStatus.parsing) { + case XML_PARSING: + case XML_SUSPENDED: + return XML_TRUE; + case XML_INITIALIZED: + case XML_FINISHED: + default: + return XML_FALSE; + } +} + enum XML_Status XMLCALL XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { if (parser == NULL) @@ -1331,8 +1410,7 @@ XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { XXX There's no way for the caller to determine which of the XXX possible error cases caused the XML_STATUS_ERROR return. */ - if (parser->m_parsingStatus.parsing == XML_PARSING - || parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parserBusy(parser)) return XML_STATUS_ERROR; /* Get rid of any previous encoding name */ @@ -1569,7 +1647,34 @@ XML_ParserFree(XML_Parser parser) { entityList = entityList->next; FREE(parser, openEntity); } - + /* free m_openAttributeEntities and m_freeAttributeEntities */ + entityList = parser->m_openAttributeEntities; + for (;;) { + OPEN_INTERNAL_ENTITY *openEntity; + if (entityList == NULL) { + if (parser->m_freeAttributeEntities == NULL) + break; + entityList = parser->m_freeAttributeEntities; + parser->m_freeAttributeEntities = NULL; + } + openEntity = entityList; + entityList = entityList->next; + FREE(parser, openEntity); + } + /* free m_openValueEntities and m_freeValueEntities */ + entityList = parser->m_openValueEntities; + for (;;) { + OPEN_INTERNAL_ENTITY *openEntity; + if (entityList == NULL) { + if (parser->m_freeValueEntities == NULL) + break; + entityList = parser->m_freeValueEntities; + parser->m_freeValueEntities = NULL; + } + openEntity = entityList; + entityList = entityList->next; + FREE(parser, openEntity); + } destroyBindings(parser->m_freeBindingList, parser); destroyBindings(parser->m_inheritedBindings, parser); poolDestroy(&parser->m_tempPool); @@ -1611,8 +1716,7 @@ XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { return XML_ERROR_INVALID_ARGUMENT; #ifdef XML_DTD /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parser->m_parsingStatus.parsing == XML_PARSING - || parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parserBusy(parser)) return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; parser->m_useForeignDTD = useDTD; return XML_ERROR_NONE; @@ -1627,8 +1731,7 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { if (parser == NULL) return; /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parser->m_parsingStatus.parsing == XML_PARSING - || parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parserBusy(parser)) return; parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; } @@ -1897,8 +2000,7 @@ XML_SetParamEntityParsing(XML_Parser parser, if (parser == NULL) return 0; /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parser->m_parsingStatus.parsing == XML_PARSING - || parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parserBusy(parser)) return 0; #ifdef XML_DTD parser->m_paramEntityParsing = peParsing; @@ -1915,8 +2017,7 @@ XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { if (parser->m_parentParser) return XML_SetHashSalt(parser->m_parentParser, hash_salt); /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parser->m_parsingStatus.parsing == XML_PARSING - || parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parserBusy(parser)) return 0; parser->m_hash_secret_salt = hash_salt; return 1; @@ -2230,6 +2331,11 @@ XML_GetBuffer(XML_Parser parser, int len) { return parser->m_bufferEnd; } +static void +triggerReenter(XML_Parser parser) { + parser->m_reenter = XML_TRUE; +} + enum XML_Status XMLCALL XML_StopParser(XML_Parser parser, XML_Bool resumable) { if (parser == NULL) @@ -2704,8 +2810,9 @@ static enum XML_Error PTRCALL contentProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { enum XML_Error result = doContent( - parser, 0, parser->m_encoding, start, end, endPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); + parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end, + endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, + XML_ACCOUNT_DIRECT); if (result == XML_ERROR_NONE) { if (! storeRawNames(parser)) return XML_ERROR_NO_MEMORY; @@ -2793,6 +2900,11 @@ externalEntityInitProcessor3(XML_Parser parser, const char *start, return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; + case XML_PARSING: + if (parser->m_reenter) { + return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE + } + /* Fall through */ default: start = next; } @@ -2966,7 +3078,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, reportDefault(parser, enc, s, next); break; } - result = processInternalEntity(parser, entity, XML_FALSE); + result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL); if (result != XML_ERROR_NONE) return result; } else if (parser->m_externalEntityRefHandler) { @@ -3092,7 +3204,9 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, } if ((parser->m_tagLevel == 0) && (parser->m_parsingStatus.parsing != XML_FINISHED)) { - if (parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_SUSPENDED + || (parser->m_parsingStatus.parsing == XML_PARSING + && parser->m_reenter)) parser->m_processor = epilogProcessor; else return epilogProcessor(parser, next, end, nextPtr); @@ -3153,7 +3267,9 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, } if ((parser->m_tagLevel == 0) && (parser->m_parsingStatus.parsing != XML_FINISHED)) { - if (parser->m_parsingStatus.parsing == XML_SUSPENDED) + if (parser->m_parsingStatus.parsing == XML_SUSPENDED + || (parser->m_parsingStatus.parsing == XML_PARSING + && parser->m_reenter)) parser->m_processor = epilogProcessor; else return epilogProcessor(parser, next, end, nextPtr); @@ -3293,6 +3409,12 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; + case XML_PARSING: + if (parser->m_reenter) { + *nextPtr = next; + return XML_ERROR_NONE; + } + /* Fall through */ default:; } } @@ -4217,6 +4339,11 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; + case XML_PARSING: + if (parser->m_reenter) { + return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE + } + /* Fall through */ default:; } } @@ -4549,7 +4676,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, } /* found end of entity value - can store it now */ return storeEntityValue(parser, parser->m_encoding, s, end, - XML_ACCOUNT_DIRECT); + XML_ACCOUNT_DIRECT, NULL); } else if (tok == XML_TOK_XML_DECL) { enum XML_Error result; result = processXmlDecl(parser, 0, start, next); @@ -4676,7 +4803,7 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end, break; } /* found end of entity value - can store it now */ - return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); + return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL); } start = next; } @@ -5119,9 +5246,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, #if XML_GE == 1 // This will store the given replacement text in // parser->m_declEntity->textPtr. - enum XML_Error result - = storeEntityValue(parser, enc, s + enc->minBytesPerChar, - next - enc->minBytesPerChar, XML_ACCOUNT_NONE); + enum XML_Error result = callStoreEntityValue( + parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar, + XML_ACCOUNT_NONE); if (parser->m_declEntity) { parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); parser->m_declEntity->textLen @@ -5546,7 +5673,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, enum XML_Error result; XML_Bool betweenDecl = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); - result = processInternalEntity(parser, entity, betweenDecl); + result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL); if (result != XML_ERROR_NONE) return result; handleDefault = XML_FALSE; @@ -5751,6 +5878,12 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; + case XML_PARSING: + if (parser->m_reenter) { + *nextPtr = next; + return XML_ERROR_NONE; + } + /* Fall through */ default: s = next; tok = XmlPrologTok(enc, s, end, &next); @@ -5825,21 +5958,49 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end, return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; + case XML_PARSING: + if (parser->m_reenter) { + return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE + } + /* Fall through */ default:; } } } static enum XML_Error -processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { - const char *textStart, *textEnd; - const char *next; - enum XML_Error result; - OPEN_INTERNAL_ENTITY *openEntity; +processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl, + enum EntityType type) { + OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList; + switch (type) { + case ENTITY_INTERNAL: + parser->m_processor = internalEntityProcessor; + openEntityList = &parser->m_openInternalEntities; + freeEntityList = &parser->m_freeInternalEntities; + break; + case ENTITY_ATTRIBUTE: + openEntityList = &parser->m_openAttributeEntities; + freeEntityList = &parser->m_freeAttributeEntities; + break; + case ENTITY_VALUE: + openEntityList = &parser->m_openValueEntities; + freeEntityList = &parser->m_freeValueEntities; + break; + /* default case serves merely as a safety net in case of a + * wrong entityType. Therefore we exclude the following lines + * from the test coverage. + * + * LCOV_EXCL_START + */ + default: + // Should not reach here + assert(0); + /* LCOV_EXCL_STOP */ + } - if (parser->m_freeInternalEntities) { - openEntity = parser->m_freeInternalEntities; - parser->m_freeInternalEntities = openEntity->next; + if (*freeEntityList) { + openEntity = *freeEntityList; + *freeEntityList = openEntity->next; } else { openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); @@ -5847,55 +6008,34 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { return XML_ERROR_NO_MEMORY; } entity->open = XML_TRUE; + entity->hasMore = XML_TRUE; #if XML_GE == 1 entityTrackingOnOpen(parser, entity, __LINE__); #endif entity->processed = 0; - openEntity->next = parser->m_openInternalEntities; - parser->m_openInternalEntities = openEntity; + openEntity->next = *openEntityList; + *openEntityList = openEntity; openEntity->entity = entity; + openEntity->type = type; openEntity->startTagLevel = parser->m_tagLevel; openEntity->betweenDecl = betweenDecl; openEntity->internalEventPtr = NULL; openEntity->internalEventEndPtr = NULL; - textStart = (const char *)entity->textPtr; - textEnd = (const char *)(entity->textPtr + entity->textLen); - /* Set a safe default value in case 'next' does not get set */ - next = textStart; - - if (entity->is_param) { - int tok - = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); - result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, - tok, next, &next, XML_FALSE, XML_FALSE, - XML_ACCOUNT_ENTITY_EXPANSION); - } else { - result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, - textStart, textEnd, &next, XML_FALSE, - XML_ACCOUNT_ENTITY_EXPANSION); - } - if (result == XML_ERROR_NONE) { - if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { - entity->processed = (int)(next - textStart); - parser->m_processor = internalEntityProcessor; - } else if (parser->m_openInternalEntities->entity == entity) { -#if XML_GE == 1 - entityTrackingOnClose(parser, entity, __LINE__); -#endif /* XML_GE == 1 */ - entity->open = XML_FALSE; - parser->m_openInternalEntities = openEntity->next; - /* put openEntity back in list of free instances */ - openEntity->next = parser->m_freeInternalEntities; - parser->m_freeInternalEntities = openEntity; - } + // Only internal entities make use of the reenter flag + // therefore no need to set it for other entity types + if (type == ENTITY_INTERNAL) { + triggerReenter(parser); } - return result; + return XML_ERROR_NONE; } static enum XML_Error PTRCALL internalEntityProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { + UNUSED_P(s); + UNUSED_P(end); + UNUSED_P(nextPtr); ENTITY *entity; const char *textStart, *textEnd; const char *next; @@ -5905,68 +6045,67 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, return XML_ERROR_UNEXPECTED_STATE; entity = openEntity->entity; - textStart = ((const char *)entity->textPtr) + entity->processed; - textEnd = (const char *)(entity->textPtr + entity->textLen); - /* Set a safe default value in case 'next' does not get set */ - next = textStart; - - if (entity->is_param) { - int tok - = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); - result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, - tok, next, &next, XML_FALSE, XML_TRUE, - XML_ACCOUNT_ENTITY_EXPANSION); - } else { - result = doContent(parser, openEntity->startTagLevel, - parser->m_internalEncoding, textStart, textEnd, &next, - XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); - } - if (result != XML_ERROR_NONE) - return result; + // This will return early + if (entity->hasMore) { + textStart = ((const char *)entity->textPtr) + entity->processed; + textEnd = (const char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + next = textStart; + + if (entity->is_param) { + int tok + = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, + tok, next, &next, XML_FALSE, XML_FALSE, + XML_ACCOUNT_ENTITY_EXPANSION); + } else { + result = doContent(parser, openEntity->startTagLevel, + parser->m_internalEncoding, textStart, textEnd, &next, + XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); + } + + if (result != XML_ERROR_NONE) + return result; + // Check if entity is complete, if not, mark down how much of it is + // processed + if (textEnd != next + && (parser->m_parsingStatus.parsing == XML_SUSPENDED + || (parser->m_parsingStatus.parsing == XML_PARSING + && parser->m_reenter))) { + entity->processed = (int)(next - (const char *)entity->textPtr); + return result; + } - if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { - entity->processed = (int)(next - (const char *)entity->textPtr); + // Entity is complete. We cannot close it here since we need to first + // process its possible inner entities (which are added to the + // m_openInternalEntities during doProlog or doContent calls above) + entity->hasMore = XML_FALSE; + triggerReenter(parser); return result; - } + } // End of entity processing, "if" block will return here + // Remove fully processed openEntity from open entity list. #if XML_GE == 1 entityTrackingOnClose(parser, entity, __LINE__); #endif + // openEntity is m_openInternalEntities' head, as we set it at the start of + // this function and we skipped doProlog and doContent calls with hasMore set + // to false. This means we can directly remove the head of + // m_openInternalEntities + assert(parser->m_openInternalEntities == openEntity); entity->open = XML_FALSE; - parser->m_openInternalEntities = openEntity->next; + parser->m_openInternalEntities = parser->m_openInternalEntities->next; + /* put openEntity back in list of free instances */ openEntity->next = parser->m_freeInternalEntities; parser->m_freeInternalEntities = openEntity; - // If there are more open entities we want to stop right here and have the - // upcoming call to XML_ResumeParser continue with entity content, or it would - // be ignored altogether. - if (parser->m_openInternalEntities != NULL - && parser->m_parsingStatus.parsing == XML_SUSPENDED) { - return XML_ERROR_NONE; - } - - if (entity->is_param) { - int tok; - parser->m_processor = prologProcessor; - tok = XmlPrologTok(parser->m_encoding, s, end, &next); - return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, - XML_ACCOUNT_DIRECT); - } else { - parser->m_processor = contentProcessor; - /* see externalEntityContentProcessor vs contentProcessor */ - result = doContent(parser, parser->m_parentParser ? 1 : 0, - parser->m_encoding, s, end, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, - XML_ACCOUNT_DIRECT); - if (result == XML_ERROR_NONE) { - if (! storeRawNames(parser)) - return XML_ERROR_NO_MEMORY; - } - return result; + if (parser->m_openInternalEntities == NULL) { + parser->m_processor = entity->is_param ? prologProcessor : contentProcessor; } + triggerReenter(parser); + return XML_ERROR_NONE; } static enum XML_Error PTRCALL @@ -5982,8 +6121,70 @@ static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, const char *ptr, const char *end, STRING_POOL *pool, enum XML_Account account) { - enum XML_Error result - = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); + const char *next = ptr; + enum XML_Error result = XML_ERROR_NONE; + + while (1) { + if (! parser->m_openAttributeEntities) { + result = appendAttributeValue(parser, enc, isCdata, next, end, pool, + account, &next); + } else { + OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities; + if (! openEntity) + return XML_ERROR_UNEXPECTED_STATE; + + ENTITY *const entity = openEntity->entity; + const char *const textStart + = ((const char *)entity->textPtr) + entity->processed; + const char *const textEnd + = (const char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + const char *nextInEntity = textStart; + if (entity->hasMore) { + result = appendAttributeValue( + parser, parser->m_internalEncoding, isCdata, textStart, textEnd, + pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity); + if (result != XML_ERROR_NONE) + break; + // Check if entity is complete, if not, mark down how much of it is + // processed. A XML_SUSPENDED check here is not required as + // appendAttributeValue will never suspend the parser. + if (textEnd != nextInEntity) { + entity->processed + = (int)(nextInEntity - (const char *)entity->textPtr); + continue; + } + + // Entity is complete. We cannot close it here since we need to first + // process its possible inner entities (which are added to the + // m_openAttributeEntities during appendAttributeValue) + entity->hasMore = XML_FALSE; + continue; + } // End of entity processing, "if" block skips the rest + + // Remove fully processed openEntity from open entity list. +#if XML_GE == 1 + entityTrackingOnClose(parser, entity, __LINE__); +#endif + // openEntity is m_openAttributeEntities' head, since we set it at the + // start of this function and because we skipped appendAttributeValue call + // with hasMore set to false. This means we can directly remove the head + // of m_openAttributeEntities + assert(parser->m_openAttributeEntities == openEntity); + entity->open = XML_FALSE; + parser->m_openAttributeEntities = parser->m_openAttributeEntities->next; + + /* put openEntity back in list of free instances */ + openEntity->next = parser->m_freeAttributeEntities; + parser->m_freeAttributeEntities = openEntity; + } + + // Break if an error occurred or there is nothing left to process + if (result || (parser->m_openAttributeEntities == NULL && end == next)) { + break; + } + } + if (result) return result; if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) @@ -5996,7 +6197,7 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, const char *ptr, const char *end, STRING_POOL *pool, - enum XML_Account account) { + enum XML_Account account, const char **nextPtr) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ #ifndef XML_DTD UNUSED_P(account); @@ -6014,6 +6215,9 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, #endif switch (tok) { case XML_TOK_NONE: + if (nextPtr) { + *nextPtr = next; + } return XML_ERROR_NONE; case XML_TOK_INVALID: if (enc == parser->m_encoding) @@ -6154,21 +6358,11 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; } else { enum XML_Error result; - const XML_Char *textEnd = entity->textPtr + entity->textLen; - entity->open = XML_TRUE; -#if XML_GE == 1 - entityTrackingOnOpen(parser, entity, __LINE__); -#endif - result = appendAttributeValue(parser, parser->m_internalEncoding, - isCdata, (const char *)entity->textPtr, - (const char *)textEnd, pool, - XML_ACCOUNT_ENTITY_EXPANSION); -#if XML_GE == 1 - entityTrackingOnClose(parser, entity, __LINE__); -#endif - entity->open = XML_FALSE; - if (result) - return result; + result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE); + if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) { + *nextPtr = next; + } + return result; } } break; default: @@ -6197,7 +6391,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *entityTextPtr, const char *entityTextEnd, - enum XML_Account account) { + enum XML_Account account, const char **nextPtr) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ STRING_POOL *pool = &(dtd->entityValuePool); enum XML_Error result = XML_ERROR_NONE; @@ -6215,8 +6409,9 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, return XML_ERROR_NO_MEMORY; } + const char *next; for (;;) { - const char *next + next = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); @@ -6278,16 +6473,8 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, } else dtd->keepProcessing = dtd->standalone; } else { - entity->open = XML_TRUE; - entityTrackingOnOpen(parser, entity, __LINE__); - result = storeEntityValue( - parser, parser->m_internalEncoding, (const char *)entity->textPtr, - (const char *)(entity->textPtr + entity->textLen), - XML_ACCOUNT_ENTITY_EXPANSION); - entityTrackingOnClose(parser, entity, __LINE__); - entity->open = XML_FALSE; - if (result) - goto endEntityValue; + result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE); + goto endEntityValue; } break; } @@ -6375,6 +6562,81 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, # ifdef XML_DTD parser->m_prologState.inEntityValue = oldInEntityValue; # endif /* XML_DTD */ + // If 'nextPtr' is given, it should be updated during the processing + if (nextPtr != NULL) { + *nextPtr = next; + } + return result; +} + +static enum XML_Error +callStoreEntityValue(XML_Parser parser, const ENCODING *enc, + const char *entityTextPtr, const char *entityTextEnd, + enum XML_Account account) { + const char *next = entityTextPtr; + enum XML_Error result = XML_ERROR_NONE; + while (1) { + if (! parser->m_openValueEntities) { + result + = storeEntityValue(parser, enc, next, entityTextEnd, account, &next); + } else { + OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities; + if (! openEntity) + return XML_ERROR_UNEXPECTED_STATE; + + ENTITY *const entity = openEntity->entity; + const char *const textStart + = ((const char *)entity->textPtr) + entity->processed; + const char *const textEnd + = (const char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + const char *nextInEntity = textStart; + if (entity->hasMore) { + result = storeEntityValue(parser, parser->m_internalEncoding, textStart, + textEnd, XML_ACCOUNT_ENTITY_EXPANSION, + &nextInEntity); + if (result != XML_ERROR_NONE) + break; + // Check if entity is complete, if not, mark down how much of it is + // processed. A XML_SUSPENDED check here is not required as + // appendAttributeValue will never suspend the parser. + if (textEnd != nextInEntity) { + entity->processed + = (int)(nextInEntity - (const char *)entity->textPtr); + continue; + } + + // Entity is complete. We cannot close it here since we need to first + // process its possible inner entities (which are added to the + // m_openValueEntities during storeEntityValue) + entity->hasMore = XML_FALSE; + continue; + } // End of entity processing, "if" block skips the rest + + // Remove fully processed openEntity from open entity list. +# if XML_GE == 1 + entityTrackingOnClose(parser, entity, __LINE__); +# endif + // openEntity is m_openValueEntities' head, since we set it at the + // start of this function and because we skipped storeEntityValue call + // with hasMore set to false. This means we can directly remove the head + // of m_openValueEntities + assert(parser->m_openValueEntities == openEntity); + entity->open = XML_FALSE; + parser->m_openValueEntities = parser->m_openValueEntities->next; + + /* put openEntity back in list of free instances */ + openEntity->next = parser->m_freeValueEntities; + parser->m_freeValueEntities = openEntity; + } + + // Break if an error occurred or there is nothing left to process + if (result + || (parser->m_openValueEntities == NULL && entityTextEnd == next)) { + break; + } + } + return result; } @@ -8542,11 +8804,13 @@ unsignedCharToPrintable(unsigned char c) { return "\\xFE"; case 255: return "\\xFF"; + // LCOV_EXCL_START default: assert(0); /* never gets here */ return "dead code"; } assert(0); /* never gets here */ + // LCOV_EXCL_STOP } #endif /* XML_GE == 1 */ diff --git a/Tools/build/.warningignore_macos b/Tools/build/.warningignore_macos index e72309229cc60f..d7b62bc6a43ba6 100644 --- a/Tools/build/.warningignore_macos +++ b/Tools/build/.warningignore_macos @@ -4,6 +4,6 @@ # Format example: # /path/to/file (number of warnings in file) Modules/expat/siphash.h 7 -Modules/expat/xmlparse.c 8 +Modules/expat/xmlparse.c 13 Modules/expat/xmltok.c 3 Modules/expat/xmltok_impl.c 26 _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com