Title: [257134] trunk/Source/_javascript_Core
Revision
257134
Author
[email protected]
Date
2020-02-21 08:20:55 -0800 (Fri, 21 Feb 2020)

Log Message

Make support for bytecode caching more robust against file corruption.
https://bugs.webkit.org/show_bug.cgi?id=207972
<rdar://problem/59260595>

Reviewed by Yusuke Suzuki.

If a bytecode cache file is corrupted, we currently will always crash every time
we try to read it (in perpetuity as long as the corrupted cache file continues to
exist on disk).  To guard against this, we'll harden the bytecode caching mechanism
as follows:

1. Modify the writeCache operation to always write the cache file in a transactional
   manner i.e. we'll first write to a .tmp file, and then rename the .tmp file to
   the cache file only if the entire file has been written in completeness.

   This ensures that we won't get corrupted cache files due to interrupted writes.

2. Modify the writeCache operation to also compute a SHA1 hash of the cache file
   and append the hash at end of the file.  Modify the readCache operation to
   first authenticate the SHA1 hash before allowing the cache file to be used.
   If the hash does not match, the file is bad, and we'll just delete it.

   This ensures that we won't be crashing while decoding a corrupted cache file.

Manually tested with the following scenarios and ensuring that the client recovers
with no crashes:

1. no cache file on disk.
2. a 0-sized cache file on a disk.
3. a truncated cache file on disk.
4. a corrupted cache file on disk.
5. an uncorrupted cache file on disk.

Also added some static_asserts in CachedTypes.cpp to document some invariants that
the pre-existing code is dependent on.

* API/JSScript.mm:
(-[JSScript readCache]):
(-[JSScript writeCache:]):
* runtime/CachedTypes.cpp:

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/API/JSScript.mm (257133 => 257134)


--- trunk/Source/_javascript_Core/API/JSScript.mm	2020-02-21 12:38:18 UTC (rev 257133)
+++ trunk/Source/_javascript_Core/API/JSScript.mm	2020-02-21 16:20:55 UTC (rev 257134)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019 Apple Inc. All rights reserved.
+ * Copyright (C) 2019-2020 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -40,6 +40,7 @@
 #import <sys/stat.h>
 #import <wtf/FileMetadata.h>
 #import <wtf/FileSystem.h>
+#import <wtf/SHA1.h>
 #import <wtf/Scope.h>
 #import <wtf/WeakObjCPtr.h>
 #import <wtf/spi/darwin/DataVaultSPI.h>
@@ -153,7 +154,10 @@
     if (!m_cachePath)
         return;
 
-    auto fd = FileSystem::openAndLockFile([m_cachePath path].UTF8String, FileSystem::FileOpenMode::Read, {FileSystem::FileLockMode::Exclusive, FileSystem::FileLockMode::Nonblocking});
+    NSString *cachePathString = [m_cachePath path];
+    const char* cacheFilename = cachePathString.UTF8String;
+
+    auto fd = FileSystem::openAndLockFile(cacheFilename, FileSystem::FileOpenMode::Read, {FileSystem::FileLockMode::Exclusive, FileSystem::FileLockMode::Nonblocking});
     if (!FileSystem::isHandleValid(fd))
         return;
     auto closeFD = makeScopeExit([&] {
@@ -165,6 +169,30 @@
     if (!success)
         return;
 
+    const uint8_t* fileData = reinterpret_cast<const uint8_t*>(mappedFile.data());
+    unsigned fileTotalSize = mappedFile.size();
+
+    // Ensure we at least have a SHA1::Digest to read.
+    if (fileTotalSize < sizeof(SHA1::Digest)) {
+        FileSystem::deleteFile(cacheFilename);
+        return;
+    }
+
+    unsigned fileDataSize = fileTotalSize - sizeof(SHA1::Digest);
+
+    SHA1::Digest computedHash;
+    SHA1 sha1;
+    sha1.addBytes(fileData, fileDataSize);
+    sha1.computeHash(computedHash);
+
+    SHA1::Digest fileHash;
+    memcpy(&fileHash, fileData + fileDataSize, sizeof(SHA1::Digest));
+
+    if (computedHash != fileHash) {
+        FileSystem::deleteFile(cacheFilename);
+        return;
+    }
+
     Ref<JSC::CachedBytecode> cachedBytecode = JSC::CachedBytecode::create(WTFMove(mappedFile));
 
     JSC::VM& vm = *toJS([m_virtualMachine JSContextGroupRef]);
@@ -173,7 +201,7 @@
     if (isCachedBytecodeStillValid(vm, cachedBytecode.copyRef(), key, m_type == kJSScriptTypeProgram ? JSC::SourceCodeType::ProgramType : JSC::SourceCodeType::ModuleType))
         m_cachedBytecode = WTFMove(cachedBytecode);
     else
-        ftruncate(fd, 0);
+        FileSystem::truncateFile(fd, 0);
 }
 
 - (BOOL)cacheBytecodeWithError:(NSError **)error
@@ -266,24 +294,42 @@
         return NO;
     }
 
-    int fd = open([m_cachePath path].UTF8String, O_CREAT | O_RDWR | O_EXLOCK | O_NONBLOCK, 0666);
+    // We want to do the write as a transaction (i.e. we guarantee that it's all
+    // or nothing). So, we'll write to a temp file first, and rename the temp
+    // file to the cache file only after we've finished writing the whole thing.
+
+    NSString *cachePathString = [m_cachePath path];
+    const char* cacheFileName = cachePathString.UTF8String;
+    const char* tempFileName = [cachePathString stringByAppendingString:@".tmp"].UTF8String;
+    int fd = open(cacheFileName, O_CREAT | O_WRONLY | O_EXLOCK | O_NONBLOCK, 0600);
     if (fd == -1) {
         error = makeString("Could not open or lock the bytecode cache file. It's likely another VM or process is already using it. Error: ", strerror(errno));
         return NO;
     }
+
     auto closeFD = makeScopeExit([&] {
         close(fd);
     });
 
+    int tempFD = open(tempFileName, O_CREAT | O_RDWR | O_EXLOCK | O_NONBLOCK, 0600);
+    if (tempFD == -1) {
+        error = makeString("Could not open or lock the bytecode cache temp file. Error: ", strerror(errno));
+        return NO;
+    }
+
+    auto closeTempFD = makeScopeExit([&] {
+        close(tempFD);
+    });
+
     JSC::BytecodeCacheError cacheError;
     JSC::SourceCode sourceCode = [self sourceCode];
     JSC::VM& vm = *toJS([m_virtualMachine JSContextGroupRef]);
     switch (m_type) {
     case kJSScriptTypeModule:
-        m_cachedBytecode = JSC::generateModuleBytecode(vm, sourceCode, fd, cacheError);
+        m_cachedBytecode = JSC::generateModuleBytecode(vm, sourceCode, tempFD, cacheError);
         break;
     case kJSScriptTypeProgram:
-        m_cachedBytecode = JSC::generateProgramBytecode(vm, sourceCode, fd, cacheError);
+        m_cachedBytecode = JSC::generateProgramBytecode(vm, sourceCode, tempFD, cacheError);
         break;
     }
 
@@ -294,6 +340,14 @@
         return NO;
     }
 
+    SHA1::Digest computedHash;
+    SHA1 sha1;
+    sha1.addBytes(m_cachedBytecode->data(), m_cachedBytecode->size());
+    sha1.computeHash(computedHash);
+    FileSystem::writeToFile(tempFD, reinterpret_cast<const char*>(&computedHash), sizeof(computedHash));
+
+    fsync(tempFD);
+    rename(tempFileName, cacheFileName);
     return YES;
 }
 

Modified: trunk/Source/_javascript_Core/ChangeLog (257133 => 257134)


--- trunk/Source/_javascript_Core/ChangeLog	2020-02-21 12:38:18 UTC (rev 257133)
+++ trunk/Source/_javascript_Core/ChangeLog	2020-02-21 16:20:55 UTC (rev 257134)
@@ -1,3 +1,46 @@
+2020-02-20  Mark Lam  <[email protected]>
+
+        Make support for bytecode caching more robust against file corruption.
+        https://bugs.webkit.org/show_bug.cgi?id=207972
+        <rdar://problem/59260595>
+
+        Reviewed by Yusuke Suzuki.
+
+        If a bytecode cache file is corrupted, we currently will always crash every time
+        we try to read it (in perpetuity as long as the corrupted cache file continues to
+        exist on disk).  To guard against this, we'll harden the bytecode caching mechanism
+        as follows:
+
+        1. Modify the writeCache operation to always write the cache file in a transactional
+           manner i.e. we'll first write to a .tmp file, and then rename the .tmp file to
+           the cache file only if the entire file has been written in completeness.
+
+           This ensures that we won't get corrupted cache files due to interrupted writes.
+
+        2. Modify the writeCache operation to also compute a SHA1 hash of the cache file
+           and append the hash at end of the file.  Modify the readCache operation to
+           first authenticate the SHA1 hash before allowing the cache file to be used.
+           If the hash does not match, the file is bad, and we'll just delete it.
+
+           This ensures that we won't be crashing while decoding a corrupted cache file.
+
+        Manually tested with the following scenarios and ensuring that the client recovers
+        with no crashes:
+
+        1. no cache file on disk.
+        2. a 0-sized cache file on a disk.
+        3. a truncated cache file on disk.
+        4. a corrupted cache file on disk.
+        5. an uncorrupted cache file on disk.
+
+        Also added some static_asserts in CachedTypes.cpp to document some invariants that
+        the pre-existing code is dependent on.
+
+        * API/JSScript.mm:
+        (-[JSScript readCache]):
+        (-[JSScript writeCache:]):
+        * runtime/CachedTypes.cpp:
+
 2020-02-19  Ross Kirsling  <[email protected]>
 
         Computed Properties with increment sometimes produces incorrect results

Modified: trunk/Source/_javascript_Core/runtime/CachedTypes.cpp (257133 => 257134)


--- trunk/Source/_javascript_Core/runtime/CachedTypes.cpp	2020-02-21 12:38:18 UTC (rev 257133)
+++ trunk/Source/_javascript_Core/runtime/CachedTypes.cpp	2020-02-21 16:20:55 UTC (rev 257134)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019 Apple Inc. All rights reserved.
+ * Copyright (C) 2019-2020 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -2318,6 +2318,8 @@
     CachedCodeBlockTag m_tag;
 };
 
+static_assert(alignof(GenericCacheEntry) <= alignof(std::max_align_t));
+
 template<typename UnlinkedCodeBlockType>
 class CacheEntry : public GenericCacheEntry {
 public:
@@ -2355,6 +2357,9 @@
     CachedPtr<CachedCodeBlockType<UnlinkedCodeBlockType>> m_codeBlock;
 };
 
+static_assert(alignof(CacheEntry<UnlinkedProgramCodeBlock>) <= alignof(std::max_align_t));
+static_assert(alignof(CacheEntry<UnlinkedModuleProgramCodeBlock>) <= alignof(std::max_align_t));
+
 bool GenericCacheEntry::decode(Decoder& decoder, std::pair<SourceCodeKey, UnlinkedCodeBlock*>& result) const
 {
     if (!isUpToDate(decoder))
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to