commit 098101ef5b9c23177da511d287591f491f550884
Author: Jonathan Schleifer <js@webkeks.org>
Date:   Sun Nov 24 14:29:28 2013 +0100

    Add OFString as a format string type.
    
    This also handles %C and %S differently when used in a format that is an
    OFString and uses of_unichar_t instead, similar to how unichar is used
    when the format is an NSString.

diff --git a/include/clang/Analysis/Analyses/FormatString.h b/include/clang/Analysis/Analyses/FormatString.h
index c9516b5..3659538 100644
--- a/include/clang/Analysis/Analyses/FormatString.h
+++ b/include/clang/Analysis/Analyses/FormatString.h
@@ -20,6 +20,7 @@
 #define LLVM_CLANG_FORMAT_H
 
 #include "clang/AST/CanonicalType.h"
+#include "clang/Sema/Sema.h"
 
 namespace clang {
 
@@ -483,7 +484,7 @@ public:
   /// will return null if the format specifier does not have
   /// a matching data argument or the matching argument matches
   /// more than one type.
-  ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
+  ArgType getArgType(ASTContext &Ctx, Sema::FormatStringType FSType) const;
 
   const OptionalFlag &hasThousandsGrouping() const {
       return HasThousandsGrouping;
@@ -499,7 +500,7 @@ public:
   /// flags or options. Returns true on success, or false when a conversion
   /// was not successful.
   bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
-               bool IsObjCLiteral);
+               Sema::FormatStringType FSType);
 
   void toString(raw_ostream &os) const;
 
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index ed28583..581c6f4 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -7836,6 +7836,7 @@ public:
     FST_Scanf,
     FST_Printf,
     FST_NSString,
+    FST_OFString,
     FST_Strftime,
     FST_Strfmon,
     FST_Kprintf,
diff --git a/lib/Analysis/PrintfFormatString.cpp b/lib/Analysis/PrintfFormatString.cpp
index f21b407..5c1d6a9 100644
--- a/lib/Analysis/PrintfFormatString.cpp
+++ b/lib/Analysis/PrintfFormatString.cpp
@@ -258,7 +258,7 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
 //===----------------------------------------------------------------------===//
 
 ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
-                                    bool IsObjCLiteral) const {
+                                    Sema::FormatStringType FSType) const {
   const PrintfConversionSpecifier &CS = getConversionSpecifier();
 
   if (!CS.consumesDataArgument())
@@ -379,20 +379,25 @@ ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
   switch (CS.getKind()) {
     case ConversionSpecifier::sArg:
       if (LM.getKind() == LengthModifier::AsWideChar) {
-        if (IsObjCLiteral)
+        if (FSType == Sema::FST_NSString)
           return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
                          "const unichar *");
         return ArgType(ArgType::WCStrTy, "wchar_t *");
       }
       return ArgType::CStrTy;
     case ConversionSpecifier::SArg:
-      if (IsObjCLiteral)
+      if (FSType == Sema::FST_NSString)
         return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
                        "const unichar *");
+      if (FSType == Sema::FST_OFString)
+        return ArgType(Ctx.getPointerType(Ctx.Char32Ty.withConst()),
+                       "const of_unichar_t *");
       return ArgType(ArgType::WCStrTy, "wchar_t *");
     case ConversionSpecifier::CArg:
-      if (IsObjCLiteral)
+      if (FSType == Sema::FST_NSString)
         return ArgType(Ctx.UnsignedShortTy, "unichar");
+      if (FSType == Sema::FST_OFString)
+        return ArgType(Ctx.Char32Ty, "of_unichar_t");
       return ArgType(Ctx.WideCharTy, "wchar_t");
     case ConversionSpecifier::pArg:
       return ArgType::CPointerTy;
@@ -407,7 +412,7 @@ ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
 }
 
 bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
-                              ASTContext &Ctx, bool IsObjCLiteral) {
+                              ASTContext &Ctx, Sema::FormatStringType FSType) {
   // %n is different from other conversion specifiers; don't try to fix it.
   if (CS.getKind() == ConversionSpecifier::nArg)
     return false;
@@ -417,7 +422,7 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
   // how CoreFoundation objects are implemented), we only show a fixit for '%@'
   // if we know it's an object (block, id, class, or __attribute__((NSObject))).
   if (QT->isObjCRetainableType()) {
-    if (!IsObjCLiteral)
+    if (FSType != Sema::FST_NSString && FSType != Sema::FST_OFString)
       return false;
 
     CS.setKind(ConversionSpecifier::ObjCObjArg);
@@ -540,7 +545,7 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
       break;
     }
 
-    const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral);
+    const analyze_printf::ArgType &ATR = getArgType(Ctx, FSType);
     if (ATR.isValid() && ATR.matchesType(Ctx, QT))
       return true;
   }
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index 0b95c48..322edac 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -2208,6 +2208,7 @@ Sema::FormatStringType Sema::GetFormatStringType(const FormatAttr *Format) {
   .Case("scanf", FST_Scanf)
   .Cases("printf", "printf0", FST_Printf)
   .Cases("NSString", "CFString", FST_NSString)
+  .Case("OFString", FST_OFString)
   .Case("strftime", FST_Strftime)
   .Case("strfmon", FST_Strfmon)
   .Cases("kprintf", "cmn_err", "vcmn_err", "zcmn_err", FST_Kprintf)
@@ -2274,7 +2275,7 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
   // format is either NSString or CFString. This is a hack to prevent
   // diag when using the NSLocalizedString and CFCopyLocalizedString macros
   // which are usually used in place of NS and CF string literals.
-  if (Type == FST_NSString &&
+  if ((Type == FST_NSString || Type == FST_OFString) &&
       SourceMgr.isInSystemMacro(Args[format_idx]->getLocStart()))
     return false;
 
@@ -2707,11 +2708,12 @@ void CheckFormatHandler::EmitFormatDiagnostic(Sema &S, bool InFunctionCall,
 
 namespace {
 class CheckPrintfHandler : public CheckFormatHandler {
+  Sema::FormatStringType FSType;
   bool ObjCContext;
 public:
   CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
                      const Expr *origFormatExpr, unsigned firstDataArg,
-                     unsigned numDataArgs, bool isObjC,
+                     unsigned numDataArgs, Sema::FormatStringType Type,
                      const char *beg, bool hasVAListArg,
                      ArrayRef<const Expr *> Args,
                      unsigned formatIdx, bool inFunctionCall,
@@ -2720,8 +2722,10 @@ public:
     : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg,
                          numDataArgs, beg, hasVAListArg, Args,
                          formatIdx, inFunctionCall, CallType, CheckedVarArgs),
-      ObjCContext(isObjC)
-  {}
+      FSType(Type)
+  {
+    ObjCContext = (Type == Sema::FST_NSString || Type == Sema::FST_OFString);
+  }
 
   
   bool HandleInvalidPrintfConversionSpecifier(
@@ -3094,7 +3098,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
   // Now type check the data expression that matches the
   // format specifier.
   const analyze_printf::ArgType &AT = FS.getArgType(S.Context,
-                                                    ObjCContext);
+                                                    FSType);
   if (!AT.isValid())
     return true;
 
@@ -3145,7 +3149,8 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
         !ExprTy->isCharType()) {
       // 'unichar' is defined as a typedef of unsigned short, but we should
       // prefer using the typedef if it is visible.
-      IntendedTy = S.Context.UnsignedShortTy;
+      IntendedTy = (FSType == Sema::FST_OFString ?
+                    S.Context.Char32Ty : S.Context.UnsignedShortTy);
 
       // While we are here, check if the value is an IntegerLiteral that happens
       // to be within the valid range.
@@ -3193,7 +3198,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
   // We may be able to offer a FixItHint if it is a supported type.
   PrintfSpecifier fixedFS = FS;
   bool success = fixedFS.fixType(IntendedTy, S.getLangOpts(),
-                                 S.Context, ObjCContext);
+                                 S.Context, FSType);
 
   if (success) {
     // Get the fix string from the fixed format specifier
@@ -3532,10 +3537,9 @@ void Sema::CheckFormatString(const StringLiteral *FExpr,
     return;
   }
   
-  if (Type == FST_Printf || Type == FST_NSString) {
+  if (Type == FST_Printf || Type == FST_NSString || Type == FST_OFString) {
     CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
-                         numDataArgs, (Type == FST_NSString),
-                         Str, HasVAListArg, Args, format_idx,
+                         numDataArgs, Type, Str, HasVAListArg, Args, format_idx,
                          inFunctionCall, CallType, CheckedVarArgs);
   
     if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen,
diff --git a/lib/Sema/SemaDeclAttr.cpp b/lib/Sema/SemaDeclAttr.cpp
index 6f88443..8942821 100644
--- a/lib/Sema/SemaDeclAttr.cpp
+++ b/lib/Sema/SemaDeclAttr.cpp
@@ -194,6 +194,23 @@ static inline bool isNSStringType(QualType T, ASTContext &Ctx) {
          ClsName == &Ctx.Idents.get("NSMutableString");
 }
 
+static inline bool isOFStringType(QualType T, ASTContext &Ctx) {
+  const ObjCObjectPointerType *PT = T->getAs<ObjCObjectPointerType>();
+  if (!PT)
+    return false;
+
+  ObjCInterfaceDecl *Cls = PT->getObjectType()->getInterface();
+  if (!Cls)
+    return false;
+
+  IdentifierInfo* ClsName = Cls->getIdentifier();
+
+  // FIXME: Should we walk the chain of classes?
+  return ClsName == &Ctx.Idents.get("OFString") ||
+         ClsName == &Ctx.Idents.get("OFConstantString") ||
+         ClsName == &Ctx.Idents.get("OFMutableString");
+}
+
 static inline bool isCFStringType(QualType T, ASTContext &Ctx) {
   const PointerType *PT = T->getAs<PointerType>();
   if (!PT)
@@ -3002,6 +3019,7 @@ static void handleFormatArgAttr(Sema &S, Decl *D, const AttributeList &Attr) {
 enum FormatAttrKind {
   CFStringFormat,
   NSStringFormat,
+  OFStringFormat,
   StrftimeFormat,
   SupportedFormat,
   IgnoredFormat,
@@ -3014,6 +3032,7 @@ static FormatAttrKind getFormatAttrKind(StringRef Format) {
   return llvm::StringSwitch<FormatAttrKind>(Format)
     // Check for formats that get handled specially.
     .Case("NSString", NSStringFormat)
+    .Case("OFString", OFStringFormat)
     .Case("CFString", CFStringFormat)
     .Case("strftime", StrftimeFormat)
 
@@ -3188,6 +3207,15 @@ static void handleFormatAttr(Sema &S, Decl *D, const AttributeList &Attr) {
         << "an NSString" << IdxExpr->getSourceRange();
       return;
     }
+  } else if (Kind == OFStringFormat) {
+    // FIXME: do we need to check if the type is OFString*?  What are the
+    // semantics?
+    if (!isOFStringType(Ty, S.Context)) {
+      // FIXME: Should highlight the actual expression that has the wrong type.
+      S.Diag(Attr.getLoc(), diag::err_format_attribute_not)
+        << "an OFString" << IdxExpr->getSourceRange();
+      return;
+    }
   } else if (!Ty->isPointerType() ||
              !Ty->getAs<PointerType>()->getPointeeType()->isCharType()) {
     // FIXME: Should highlight the actual expression that has the wrong type.
