With .uni UTF-16 files, it is impossible for unicode code points to be larger than 0xFFFF.
To support .utf8 UTF-8 unicode files, we need to also deal with the possibility that the UTF-8 file contains unicode code points larger than 16-bits. Since UEFI only supports 16-bit string data, we make UniClassObject generate an error if a larger code point is seen in a UTF-8 string value. We only check string value data, so it is possible to use larger code points in comments. Cc: Yingke D Liu <yingke.d....@intel.com> Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com> --- BaseTools/Source/Python/AutoGen/UniClassObject.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/BaseTools/Source/Python/AutoGen/UniClassObject.py b/BaseTools/Source/Python/AutoGen/UniClassObject.py index df30c48..fef7135 100644 --- a/BaseTools/Source/Python/AutoGen/UniClassObject.py +++ b/BaseTools/Source/Python/AutoGen/UniClassObject.py @@ -259,6 +259,14 @@ class UniFileClassObject(object): self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1 return True + def Verify16bitCodePoints(self, String): + for cp in String: + if ord(cp) > 0xffff: + tmpl = 'The string {} defined in file {} ' + \ + 'contains a character with a code point above 0xFFFF.' + error = tmpl.format(repr(String), self.File) + EdkLogger.error('Unicode File Parser', FORMAT_INVALID, error) + # # Get String name and value # @@ -280,6 +288,7 @@ class UniFileClassObject(object): Language = LanguageList[IndexI].split()[0] Value = LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')] #.replace(u'\r\n', u'') Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File) + self.Verify16bitCodePoints(Value) self.AddStringToList(Name, Language, Value) # @@ -433,6 +442,7 @@ class UniFileClassObject(object): MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE) if MatchString == None or MatchString.end(0) != len(Name): EdkLogger.error('Unicode File Parser', FORMAT_INVALID, 'The string token name %s defined in UNI file %s contains the invalid lower case character.' %(Name, self.File)) + self.Verify16bitCodePoints(Value) self.AddStringToList(Name, Language, Value) continue -- 2.1.4 ------------------------------------------------------------------------------ One dashboard for servers and applications across Physical-Virtual-Cloud Widest out-of-the-box monitoring support with 50+ applications Performance metrics, stats and reports that give you Actionable Insights Deep dive visibility with transaction tracing using APM Insight. http://ad.doubleclick.net/ddm/clk/290420510;117567292;y _______________________________________________ edk2-devel mailing list edk2-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/edk2-devel