tng 2003/03/20 12:08:55 Modified: c/src/xercesc/util XMLExceptMsgs.hpp XMLUTF8Transcoder.cpp Log: [Bug 12436] Add detection of invalid UTF-8 byte sequences. Patch from Neil Graham. Revision Changes Path 1.14 +265 -264 xml-xerces/c/src/xercesc/util/XMLExceptMsgs.hpp Index: XMLExceptMsgs.hpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLExceptMsgs.hpp,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- XMLExceptMsgs.hpp 18 Mar 2003 19:39:44 -0000 1.13 +++ XMLExceptMsgs.hpp 20 Mar 2003 20:08:54 -0000 1.14 @@ -120,270 +120,271 @@ , URL_RelativeBaseURL = 105 , URL_BaseUnderflow = 106 , URL_BadPortField = 107 - , Vector_BadIndex = 108 - , Val_InvalidElemId = 109 - , Val_CantHaveIntSS = 110 - , XMLRec_UnknownEncoding = 111 - , Parser_Parse1 = 112 - , Parser_Parse2 = 113 - , Parser_Next1 = 114 - , Parser_Next2 = 115 - , Parser_Next3 = 116 - , Parser_Next4 = 117 - , Parser_Factor1 = 118 - , Parser_Factor2 = 119 - , Parser_Factor3 = 120 - , Parser_Factor4 = 121 - , Parser_Factor5 = 122 - , Parser_Factor6 = 123 - , Parser_Atom1 = 124 - , Parser_Atom2 = 125 - , Parser_Atom3 = 126 - , Parser_Atom4 = 127 - , Parser_Atom5 = 128 - , Parser_CC1 = 129 - , Parser_CC2 = 130 - , Parser_CC3 = 131 - , Parser_CC4 = 132 - , Parser_CC5 = 133 - , Parser_CC6 = 134 - , Parser_Ope1 = 135 - , Parser_Ope2 = 136 - , Parser_Ope3 = 137 - , Parser_Descape1 = 138 - , Parser_Descape2 = 139 - , Parser_Descape3 = 140 - , Parser_Descape4 = 141 - , Parser_Descape5 = 142 - , Parser_Process1 = 143 - , Parser_Process2 = 144 - , Parser_Quantifier1 = 145 - , Parser_Quantifier2 = 146 - , Parser_Quantifier3 = 147 - , Parser_Quantifier4 = 148 - , Parser_Quantifier5 = 149 - , Gen_NoSchemaValidator = 150 - , XUTIL_UnCopyableNodeType = 151 - , SubGrpComparator_NGR = 152 - , FACET_Invalid_Len = 153 - , FACET_Invalid_maxLen = 154 - , FACET_Invalid_minLen = 155 - , FACET_NonNeg_Len = 156 - , FACET_NonNeg_maxLen = 157 - , FACET_NonNeg_minLen = 158 - , FACET_Len_maxLen = 159 - , FACET_Len_minLen = 160 - , FACET_maxLen_minLen = 161 - , FACET_bool_Pattern = 162 - , FACET_Invalid_Tag = 163 - , FACET_Len_baseLen = 164 - , FACET_minLen_baseminLen = 165 - , FACET_minLen_basemaxLen = 166 - , FACET_maxLen_basemaxLen = 167 - , FACET_maxLen_baseminLen = 168 - , FACET_Len_baseMinLen = 169 - , FACET_Len_baseMaxLen = 170 - , FACET_minLen_baseLen = 171 - , FACET_maxLen_baseLen = 172 - , FACET_enum_base = 173 - , FACET_Invalid_WS = 174 - , FACET_WS_collapse = 175 - , FACET_WS_replace = 176 - , FACET_Invalid_MaxIncl = 177 - , FACET_Invalid_MaxExcl = 178 - , FACET_Invalid_MinIncl = 179 - , FACET_Invalid_MinExcl = 180 - , FACET_Invalid_TotalDigit = 181 - , FACET_Invalid_FractDigit = 182 - , FACET_PosInt_TotalDigit = 183 - , FACET_NonNeg_FractDigit = 184 - , FACET_max_Incl_Excl = 185 - , FACET_min_Incl_Excl = 186 - , FACET_maxExcl_minExcl = 187 - , FACET_maxExcl_minIncl = 188 - , FACET_maxIncl_minExcl = 189 - , FACET_maxIncl_minIncl = 190 - , FACET_TotDigit_FractDigit = 191 - , FACET_maxIncl_base_maxExcl = 192 - , FACET_maxIncl_base_maxIncl = 193 - , FACET_maxIncl_base_minIncl = 194 - , FACET_maxIncl_base_minExcl = 195 - , FACET_maxExcl_base_maxExcl = 196 - , FACET_maxExcl_base_maxIncl = 197 - , FACET_maxExcl_base_minIncl = 198 - , FACET_maxExcl_base_minExcl = 199 - , FACET_minExcl_base_maxExcl = 200 - , FACET_minExcl_base_maxIncl = 201 - , FACET_minExcl_base_minIncl = 202 - , FACET_minExcl_base_minExcl = 203 - , FACET_minIncl_base_maxExcl = 204 - , FACET_minIncl_base_maxIncl = 205 - , FACET_minIncl_base_minIncl = 206 - , FACET_minIncl_base_minExcl = 207 - , FACET_maxIncl_notFromBase = 208 - , FACET_maxExcl_notFromBase = 209 - , FACET_minIncl_notFromBase = 210 - , FACET_minExcl_notFromBase = 211 - , FACET_totalDigit_base_totalDigit = 212 - , FACET_fractDigit_base_totalDigit = 213 - , FACET_fractDigit_base_fractDigit = 214 - , FACET_maxIncl_base_fixed = 215 - , FACET_maxExcl_base_fixed = 216 - , FACET_minIncl_base_fixed = 217 - , FACET_minExcl_base_fixed = 218 - , FACET_totalDigit_base_fixed = 219 - , FACET_fractDigit_base_fixed = 220 - , FACET_maxLen_base_fixed = 221 - , FACET_minLen_base_fixed = 222 - , FACET_len_base_fixed = 223 - , FACET_whitespace_base_fixed = 224 - , FACET_internalError_fixed = 225 - , FACET_List_Null_baseValidator = 226 - , FACET_Union_Null_memberTypeValidators = 227 - , FACET_Union_Null_baseValidator = 228 - , FACET_Union_invalid_baseValidatorType = 229 - , VALUE_NotMatch_Pattern = 230 - , VALUE_Not_Base64 = 231 - , VALUE_Not_HexBin = 232 - , VALUE_GT_maxLen = 233 - , VALUE_LT_minLen = 234 - , VALUE_NE_Len = 235 - , VALUE_NotIn_Enumeration = 236 - , VALUE_exceed_totalDigit = 237 - , VALUE_exceed_fractDigit = 238 - , VALUE_exceed_maxIncl = 239 - , VALUE_exceed_maxExcl = 240 - , VALUE_exceed_minIncl = 241 - , VALUE_exceed_minExcl = 242 - , VALUE_WS_replaced = 243 - , VALUE_WS_collapsed = 244 - , VALUE_Invalid_NCName = 245 - , VALUE_Invalid_Name = 246 - , VALUE_ID_Not_Unique = 247 - , VALUE_ENTITY_Invalid = 248 - , VALUE_QName_Invalid = 249 - , VALUE_NOTATION_Invalid = 250 - , VALUE_no_match_memberType = 251 - , VALUE_URI_Malformed = 252 - , XMLNUM_emptyString = 253 - , XMLNUM_WSString = 254 - , XMLNUM_2ManyDecPoint = 255 - , XMLNUM_Inv_chars = 256 - , XMLNUM_null_ptr = 257 - , XMLNUM_URI_Component_Empty = 258 - , XMLNUM_URI_Component_for_GenURI_Only = 259 - , XMLNUM_URI_Component_Invalid_EscapeSequence = 260 - , XMLNUM_URI_Component_Invalid_Char = 261 - , XMLNUM_URI_Component_Set_Null = 262 - , XMLNUM_URI_Component_Not_Conformant = 263 - , XMLNUM_URI_No_Scheme = 264 - , XMLNUM_URI_NullHost = 265 - , XMLNUM_URI_NullPath = 266 - , XMLNUM_URI_Component_inPath = 267 - , XMLNUM_URI_PortNo_Invalid = 268 - , XMLNUM_DBL_FLT_maxNeg = 269 - , XMLNUM_DBL_FLT_maxPos = 270 - , XMLNUM_DBL_FLT_minNegPos = 271 - , XMLNUM_DBL_FLT_InvalidType = 272 - , XMLNUM_DBL_FLT_No_Exponent = 273 - , Regex_Result_Not_Set = 274 - , Regex_CompactRangesError = 275 - , Regex_MergeRangesTypeMismatch = 276 - , Regex_SubtractRangesError = 277 - , Regex_IntersectRangesError = 278 - , Regex_ComplementRangesInvalidArg = 279 - , Regex_InvalidCategoryName = 280 - , Regex_KeywordNotFound = 281 - , Regex_BadRefNo = 282 - , Regex_UnknownOption = 283 - , Regex_UnknownTokenType = 284 - , Regex_RangeTokenGetError = 285 - , Regex_NotSupported = 286 - , Regex_InvalidChildIndex = 287 - , Regex_RepPatMatchesZeroString = 288 - , Regex_InvalidRepPattern = 289 - , NEL_RepeatedCalls = 290 - , RethrowError = 291 - , Out_Of_Memory = 292 - , DV_InvalidOperation = 293 - , XPath_NoAttrSelector = 294 - , XPath_NoUnionAtStart = 295 - , XPath_NoMultipleUnion = 296 - , XPath_MissingAttr = 297 - , XPath_ExpectedToken1 = 298 - , XPath_PrefixNoURI = 299 - , XPath_NoDoubleColon = 300 - , XPath_ExpectedStep1 = 301 - , XPath_ExpectedStep2 = 302 - , XPath_ExpectedStep3 = 303 - , XPath_NoForwardSlash = 304 - , XPath_NoDoubleForwardSlash = 305 - , XPath_NoForwardSlashAtStart = 306 - , XPath_NoSelectionOfRoot = 307 - , XPath_EmptyExpr = 308 - , XPath_NoUnionAtEnd = 309 - , XPath_InvalidChar = 310 - , XPath_TokenNotSupported = 311 - , XPath_FindSolution = 312 - , DateTime_Assert_Buffer_Fail = 313 - , DateTime_dt_missingT = 314 - , DateTime_gDay_invalid = 315 - , DateTime_gMth_invalid = 316 - , DateTime_gMthDay_invalid = 317 - , DateTime_dur_Start_dashP = 318 - , DateTime_dur_noP = 319 - , DateTime_dur_DashNotFirst = 320 - , DateTime_dur_inv_b4T = 321 - , DateTime_dur_NoTimeAfterT = 322 - , DateTime_dur_NoElementAtAll = 323 - , DateTime_dur_inv_seconds = 324 - , DateTime_date_incomplete = 325 - , DateTime_date_invalid = 326 - , DateTime_time_incomplete = 327 - , DateTime_time_invalid = 328 - , DateTime_ms_noDigit = 329 - , DateTime_ym_incomplete = 330 - , DateTime_ym_invalid = 331 - , DateTime_year_tooShort = 332 - , DateTime_year_leadingZero = 333 - , DateTime_ym_noMonth = 334 - , DateTime_tz_noUTCsign = 335 - , DateTime_tz_stuffAfterZ = 336 - , DateTime_tz_invalid = 337 - , DateTime_year_zero = 338 - , DateTime_mth_invalid = 339 - , DateTime_day_invalid = 340 - , DateTime_hour_invalid = 341 - , DateTime_min_invalid = 342 - , DateTime_second_invalid = 343 - , DateTime_tz_hh_invalid = 344 - , PD_EmptyBase = 345 - , PD_NSCompat1 = 346 - , PD_OccurRangeE = 347 - , PD_NameTypeOK1 = 348 - , PD_NameTypeOK2 = 349 - , PD_NameTypeOK3 = 350 - , PD_NameTypeOK4 = 351 - , PD_NameTypeOK5 = 352 - , PD_NameTypeOK6 = 353 - , PD_NameTypeOK7 = 354 - , PD_RecurseAsIfGroup = 355 - , PD_Recurse1 = 356 - , PD_Recurse2 = 357 - , PD_ForbiddenRes1 = 358 - , PD_ForbiddenRes2 = 359 - , PD_ForbiddenRes3 = 360 - , PD_ForbiddenRes4 = 361 - , PD_NSSubset1 = 362 - , PD_NSSubset2 = 363 - , PD_NSRecurseCheckCardinality1 = 364 - , PD_RecurseUnordered = 365 - , PD_MapAndSum = 366 - , PD_InvalidContentType = 367 - , NodeIDMap_GrowErr = 368 - , F_HighBounds = 369 - , E_LowBounds = 370 - , E_HighBounds = 371 + , UTF8_FormatError = 108 + , Vector_BadIndex = 109 + , Val_InvalidElemId = 110 + , Val_CantHaveIntSS = 111 + , XMLRec_UnknownEncoding = 112 + , Parser_Parse1 = 113 + , Parser_Parse2 = 114 + , Parser_Next1 = 115 + , Parser_Next2 = 116 + , Parser_Next3 = 117 + , Parser_Next4 = 118 + , Parser_Factor1 = 119 + , Parser_Factor2 = 120 + , Parser_Factor3 = 121 + , Parser_Factor4 = 122 + , Parser_Factor5 = 123 + , Parser_Factor6 = 124 + , Parser_Atom1 = 125 + , Parser_Atom2 = 126 + , Parser_Atom3 = 127 + , Parser_Atom4 = 128 + , Parser_Atom5 = 129 + , Parser_CC1 = 130 + , Parser_CC2 = 131 + , Parser_CC3 = 132 + , Parser_CC4 = 133 + , Parser_CC5 = 134 + , Parser_CC6 = 135 + , Parser_Ope1 = 136 + , Parser_Ope2 = 137 + , Parser_Ope3 = 138 + , Parser_Descape1 = 139 + , Parser_Descape2 = 140 + , Parser_Descape3 = 141 + , Parser_Descape4 = 142 + , Parser_Descape5 = 143 + , Parser_Process1 = 144 + , Parser_Process2 = 145 + , Parser_Quantifier1 = 146 + , Parser_Quantifier2 = 147 + , Parser_Quantifier3 = 148 + , Parser_Quantifier4 = 149 + , Parser_Quantifier5 = 150 + , Gen_NoSchemaValidator = 151 + , XUTIL_UnCopyableNodeType = 152 + , SubGrpComparator_NGR = 153 + , FACET_Invalid_Len = 154 + , FACET_Invalid_maxLen = 155 + , FACET_Invalid_minLen = 156 + , FACET_NonNeg_Len = 157 + , FACET_NonNeg_maxLen = 158 + , FACET_NonNeg_minLen = 159 + , FACET_Len_maxLen = 160 + , FACET_Len_minLen = 161 + , FACET_maxLen_minLen = 162 + , FACET_bool_Pattern = 163 + , FACET_Invalid_Tag = 164 + , FACET_Len_baseLen = 165 + , FACET_minLen_baseminLen = 166 + , FACET_minLen_basemaxLen = 167 + , FACET_maxLen_basemaxLen = 168 + , FACET_maxLen_baseminLen = 169 + , FACET_Len_baseMinLen = 170 + , FACET_Len_baseMaxLen = 171 + , FACET_minLen_baseLen = 172 + , FACET_maxLen_baseLen = 173 + , FACET_enum_base = 174 + , FACET_Invalid_WS = 175 + , FACET_WS_collapse = 176 + , FACET_WS_replace = 177 + , FACET_Invalid_MaxIncl = 178 + , FACET_Invalid_MaxExcl = 179 + , FACET_Invalid_MinIncl = 180 + , FACET_Invalid_MinExcl = 181 + , FACET_Invalid_TotalDigit = 182 + , FACET_Invalid_FractDigit = 183 + , FACET_PosInt_TotalDigit = 184 + , FACET_NonNeg_FractDigit = 185 + , FACET_max_Incl_Excl = 186 + , FACET_min_Incl_Excl = 187 + , FACET_maxExcl_minExcl = 188 + , FACET_maxExcl_minIncl = 189 + , FACET_maxIncl_minExcl = 190 + , FACET_maxIncl_minIncl = 191 + , FACET_TotDigit_FractDigit = 192 + , FACET_maxIncl_base_maxExcl = 193 + , FACET_maxIncl_base_maxIncl = 194 + , FACET_maxIncl_base_minIncl = 195 + , FACET_maxIncl_base_minExcl = 196 + , FACET_maxExcl_base_maxExcl = 197 + , FACET_maxExcl_base_maxIncl = 198 + , FACET_maxExcl_base_minIncl = 199 + , FACET_maxExcl_base_minExcl = 200 + , FACET_minExcl_base_maxExcl = 201 + , FACET_minExcl_base_maxIncl = 202 + , FACET_minExcl_base_minIncl = 203 + , FACET_minExcl_base_minExcl = 204 + , FACET_minIncl_base_maxExcl = 205 + , FACET_minIncl_base_maxIncl = 206 + , FACET_minIncl_base_minIncl = 207 + , FACET_minIncl_base_minExcl = 208 + , FACET_maxIncl_notFromBase = 209 + , FACET_maxExcl_notFromBase = 210 + , FACET_minIncl_notFromBase = 211 + , FACET_minExcl_notFromBase = 212 + , FACET_totalDigit_base_totalDigit = 213 + , FACET_fractDigit_base_totalDigit = 214 + , FACET_fractDigit_base_fractDigit = 215 + , FACET_maxIncl_base_fixed = 216 + , FACET_maxExcl_base_fixed = 217 + , FACET_minIncl_base_fixed = 218 + , FACET_minExcl_base_fixed = 219 + , FACET_totalDigit_base_fixed = 220 + , FACET_fractDigit_base_fixed = 221 + , FACET_maxLen_base_fixed = 222 + , FACET_minLen_base_fixed = 223 + , FACET_len_base_fixed = 224 + , FACET_whitespace_base_fixed = 225 + , FACET_internalError_fixed = 226 + , FACET_List_Null_baseValidator = 227 + , FACET_Union_Null_memberTypeValidators = 228 + , FACET_Union_Null_baseValidator = 229 + , FACET_Union_invalid_baseValidatorType = 230 + , VALUE_NotMatch_Pattern = 231 + , VALUE_Not_Base64 = 232 + , VALUE_Not_HexBin = 233 + , VALUE_GT_maxLen = 234 + , VALUE_LT_minLen = 235 + , VALUE_NE_Len = 236 + , VALUE_NotIn_Enumeration = 237 + , VALUE_exceed_totalDigit = 238 + , VALUE_exceed_fractDigit = 239 + , VALUE_exceed_maxIncl = 240 + , VALUE_exceed_maxExcl = 241 + , VALUE_exceed_minIncl = 242 + , VALUE_exceed_minExcl = 243 + , VALUE_WS_replaced = 244 + , VALUE_WS_collapsed = 245 + , VALUE_Invalid_NCName = 246 + , VALUE_Invalid_Name = 247 + , VALUE_ID_Not_Unique = 248 + , VALUE_ENTITY_Invalid = 249 + , VALUE_QName_Invalid = 250 + , VALUE_NOTATION_Invalid = 251 + , VALUE_no_match_memberType = 252 + , VALUE_URI_Malformed = 253 + , XMLNUM_emptyString = 254 + , XMLNUM_WSString = 255 + , XMLNUM_2ManyDecPoint = 256 + , XMLNUM_Inv_chars = 257 + , XMLNUM_null_ptr = 258 + , XMLNUM_URI_Component_Empty = 259 + , XMLNUM_URI_Component_for_GenURI_Only = 260 + , XMLNUM_URI_Component_Invalid_EscapeSequence = 261 + , XMLNUM_URI_Component_Invalid_Char = 262 + , XMLNUM_URI_Component_Set_Null = 263 + , XMLNUM_URI_Component_Not_Conformant = 264 + , XMLNUM_URI_No_Scheme = 265 + , XMLNUM_URI_NullHost = 266 + , XMLNUM_URI_NullPath = 267 + , XMLNUM_URI_Component_inPath = 268 + , XMLNUM_URI_PortNo_Invalid = 269 + , XMLNUM_DBL_FLT_maxNeg = 270 + , XMLNUM_DBL_FLT_maxPos = 271 + , XMLNUM_DBL_FLT_minNegPos = 272 + , XMLNUM_DBL_FLT_InvalidType = 273 + , XMLNUM_DBL_FLT_No_Exponent = 274 + , Regex_Result_Not_Set = 275 + , Regex_CompactRangesError = 276 + , Regex_MergeRangesTypeMismatch = 277 + , Regex_SubtractRangesError = 278 + , Regex_IntersectRangesError = 279 + , Regex_ComplementRangesInvalidArg = 280 + , Regex_InvalidCategoryName = 281 + , Regex_KeywordNotFound = 282 + , Regex_BadRefNo = 283 + , Regex_UnknownOption = 284 + , Regex_UnknownTokenType = 285 + , Regex_RangeTokenGetError = 286 + , Regex_NotSupported = 287 + , Regex_InvalidChildIndex = 288 + , Regex_RepPatMatchesZeroString = 289 + , Regex_InvalidRepPattern = 290 + , NEL_RepeatedCalls = 291 + , RethrowError = 292 + , Out_Of_Memory = 293 + , DV_InvalidOperation = 294 + , XPath_NoAttrSelector = 295 + , XPath_NoUnionAtStart = 296 + , XPath_NoMultipleUnion = 297 + , XPath_MissingAttr = 298 + , XPath_ExpectedToken1 = 299 + , XPath_PrefixNoURI = 300 + , XPath_NoDoubleColon = 301 + , XPath_ExpectedStep1 = 302 + , XPath_ExpectedStep2 = 303 + , XPath_ExpectedStep3 = 304 + , XPath_NoForwardSlash = 305 + , XPath_NoDoubleForwardSlash = 306 + , XPath_NoForwardSlashAtStart = 307 + , XPath_NoSelectionOfRoot = 308 + , XPath_EmptyExpr = 309 + , XPath_NoUnionAtEnd = 310 + , XPath_InvalidChar = 311 + , XPath_TokenNotSupported = 312 + , XPath_FindSolution = 313 + , DateTime_Assert_Buffer_Fail = 314 + , DateTime_dt_missingT = 315 + , DateTime_gDay_invalid = 316 + , DateTime_gMth_invalid = 317 + , DateTime_gMthDay_invalid = 318 + , DateTime_dur_Start_dashP = 319 + , DateTime_dur_noP = 320 + , DateTime_dur_DashNotFirst = 321 + , DateTime_dur_inv_b4T = 322 + , DateTime_dur_NoTimeAfterT = 323 + , DateTime_dur_NoElementAtAll = 324 + , DateTime_dur_inv_seconds = 325 + , DateTime_date_incomplete = 326 + , DateTime_date_invalid = 327 + , DateTime_time_incomplete = 328 + , DateTime_time_invalid = 329 + , DateTime_ms_noDigit = 330 + , DateTime_ym_incomplete = 331 + , DateTime_ym_invalid = 332 + , DateTime_year_tooShort = 333 + , DateTime_year_leadingZero = 334 + , DateTime_ym_noMonth = 335 + , DateTime_tz_noUTCsign = 336 + , DateTime_tz_stuffAfterZ = 337 + , DateTime_tz_invalid = 338 + , DateTime_year_zero = 339 + , DateTime_mth_invalid = 340 + , DateTime_day_invalid = 341 + , DateTime_hour_invalid = 342 + , DateTime_min_invalid = 343 + , DateTime_second_invalid = 344 + , DateTime_tz_hh_invalid = 345 + , PD_EmptyBase = 346 + , PD_NSCompat1 = 347 + , PD_OccurRangeE = 348 + , PD_NameTypeOK1 = 349 + , PD_NameTypeOK2 = 350 + , PD_NameTypeOK3 = 351 + , PD_NameTypeOK4 = 352 + , PD_NameTypeOK5 = 353 + , PD_NameTypeOK6 = 354 + , PD_NameTypeOK7 = 355 + , PD_RecurseAsIfGroup = 356 + , PD_Recurse1 = 357 + , PD_Recurse2 = 358 + , PD_ForbiddenRes1 = 359 + , PD_ForbiddenRes2 = 360 + , PD_ForbiddenRes3 = 361 + , PD_ForbiddenRes4 = 362 + , PD_NSSubset1 = 363 + , PD_NSSubset2 = 364 + , PD_NSRecurseCheckCardinality1 = 365 + , PD_RecurseUnordered = 366 + , PD_MapAndSum = 367 + , PD_InvalidContentType = 368 + , NodeIDMap_GrowErr = 369 + , F_HighBounds = 370 + , E_LowBounds = 371 + , E_HighBounds = 372 }; }; 1.4 +60 -13 xml-xerces/c/src/xercesc/util/XMLUTF8Transcoder.cpp Index: XMLUTF8Transcoder.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLUTF8Transcoder.cpp,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- XMLUTF8Transcoder.cpp 28 Nov 2002 17:39:02 -0000 1.3 +++ XMLUTF8Transcoder.cpp 20 Mar 2003 20:08:54 -0000 1.4 @@ -76,6 +76,16 @@ // gUTFBytes // A list of counts of trailing bytes for each initial byte in the input. // +// gUTFByteIndicator +// For a UTF8 sequence of n bytes, n>=2, the first byte of the +// sequence must contain n 1's followed by precisely 1 0 with the +// rest of the byte containing arbitrary bits. This array stores +// the required bit pattern for validity checking. +// gUTFByteIndicatorTest +// When bitwise and'd with the observed value, if the observed +// value is correct then a result matching gUTFByteIndicator will +// be produced. +// // gUTFOffsets // A list of values to offset each result char type, according to how // many source bytes when into making it. @@ -104,6 +114,15 @@ , 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 }; +static const XMLByte gUTFByteIndicator[6] = +{ + 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC +}; +static const XMLByte gUTFByteIndicatorTest[6] = +{ + 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE +}; + static const XMLUInt32 gUTFOffsets[6] = { 0, 0x3080, 0xE2080, 0x3C82080, 0xFA082080, 0x82082080 @@ -192,20 +211,48 @@ break; // Looks ok, so lets build up the value - XMLUInt32 tmpVal = 0; - switch(trailingBytes) - { - case 5 : tmpVal += *srcPtr++; tmpVal <<= 6; - case 4 : tmpVal += *srcPtr++; tmpVal <<= 6; - case 3 : tmpVal += *srcPtr++; tmpVal <<= 6; - case 2 : tmpVal += *srcPtr++; tmpVal <<= 6; - case 1 : tmpVal += *srcPtr++; tmpVal <<= 6; - case 0 : tmpVal += *srcPtr++; - break; + // or at least let's try to do so--remembering that + // we cannot assume the encoding to be valid: - default : - ThrowXML(TranscodingException, XMLExcepts::Trans_BadSrcSeq); + // first, test first byte + if((gUTFByteIndicatorTest[trailingBytes] & *srcPtr) != gUTFByteIndicator[trailingBytes]) { + char pos[2] = {(char)0x31, 0}; + char len[2] = {(char)trailingBytes+0x31, 0}; + char byte[2] = {*srcPtr,0}; + ThrowXML3(UTFDataFormatException, XMLExcepts::UTF8_FormatError, pos, byte, len); + } + + XMLUInt32 tmpVal = *srcPtr++; + tmpVal <<= 6; + for(unsigned int i=1; i<trailingBytes; i++) + { + if((*srcPtr & 0xC0) == 0x80) + { + tmpVal += *srcPtr++; + tmpVal <<= 6; + } + else + { + char len[2] = {(char)trailingBytes+0x31, 0}; + char pos[2]= {(char)i+0x31, 0}; + char byte[2] = {*srcPtr,0}; + ThrowXML3(UTFDataFormatException, XMLExcepts::UTF8_FormatError, pos, byte, len); + } + } + if((*srcPtr & 0xC0) == 0x80) + { + tmpVal += *srcPtr++; + } + else + { + char len[2] = {(char)trailingBytes+0x31, 0}; + char byte[2] = {*srcPtr,0}; + ThrowXML3(UTFDataFormatException, XMLExcepts::UTF8_FormatError, len, byte, len); } + // since trailingBytes comes from an array, this logic is redundant + // default : + // ThrowXML(TranscodingException, XMLExcepts::Trans_BadSrcSeq); + //} tmpVal -= gUTFOffsets[trailingBytes]; //
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]