Re: [edk2-devel] [Patch V3] BaseTools: Library hashing fix and optimization for --hash feature

2019-05-20 Thread Bob Feng
This patch is good to me.

Reviewed-by: Bob Feng 

-Original Message-
From: Rodriguez, Christian 
Sent: Monday, May 20, 2019 10:18 PM
To: devel@edk2.groups.io
Cc: Feng, Bob C ; Gao, Liming ; 
Zhu, Yonghong 
Subject: [Patch V3] BaseTools: Library hashing fix and optimization for --hash 
feature

BZ: https://bugzilla.tianocore.org/show_bug.cgi?id=1788

In V3: Must generate hashes before attempting to copy from cache for hash 
verifcation In V2: Build failure caused by passing incorrect boolean parameter 
to SaveFileOnChange(). Fixed for patch instances.

Library hashing is now supported by the --hash feature. The --hash feature 
implementation assumed that the hashing could be done in place once per module, 
but that isn't true for libraries due to the fact that they are built as 
dependencies. So on a clean build, we now generate the .hash after the library 
dependencies are complete.
Added early escape as optimization, if hash already exists in memory.

Signed-off-by: Christian Rodriguez 
Cc: Bob Feng 
Cc: Liming Gao 
Cc: Yonghong Zhu 
---
 BaseTools/Source/Python/AutoGen/AutoGen.py   | 53 
-
 BaseTools/Source/Python/Common/GlobalData.py |  6 ++
 BaseTools/Source/Python/build/build.py   |  7 ++-
 3 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/BaseTools/Source/Python/AutoGen/AutoGen.py 
b/BaseTools/Source/Python/AutoGen/AutoGen.py
index 31721a6f9f..81efc4f06c 100644
--- a/BaseTools/Source/Python/AutoGen/AutoGen.py
+++ b/BaseTools/Source/Python/AutoGen/AutoGen.py
@@ -3935,6 +3935,7 @@ class ModuleAutoGen(AutoGen):
 f = open(HashFile, 'r')
 CacheHash = f.read()
 f.close()
+self.GenModuleHash()
 if GlobalData.gModuleHash[self.Arch][self.Name]:
 if CacheHash == GlobalData.gModuleHash[self.Arch][self.Name]:
 for root, dir, files in os.walk(FileDir):
@@ -4093,13 +4094,20 @@ class ModuleAutoGen(AutoGen):
 return RetVal
 
 def GenModuleHash(self):
+# Initialize a dictionary for each arch type
 if self.Arch not in GlobalData.gModuleHash:
 GlobalData.gModuleHash[self.Arch] = {}
-if self.Name in GlobalData.gModuleHash[self.Arch] and 
GlobalData.gBinCacheSource and self.AttemptModuleCacheCopy():
-return False
+
+# Early exit if module or library has been hashed and is in memory
+if self.Name in GlobalData.gModuleHash[self.Arch]:
+return 
+ GlobalData.gModuleHash[self.Arch][self.Name].encode('utf-8')
+
+# Initialze hash object
 m = hashlib.md5()
+
 # Add Platform level hash
 m.update(GlobalData.gPlatformHash.encode('utf-8'))
+
 # Add Package level hash
 if self.DependentPackageList:
 for Pkg in sorted(self.DependentPackageList, key=lambda x: 
x.PackageName):
@@ -4118,6 +4126,7 @@ class ModuleAutoGen(AutoGen):
 Content = f.read()
 f.close()
 m.update(Content)
+
 # Add Module's source files
 if self.SourceFileList:
 for File in sorted(self.SourceFileList, key=lambda x: str(x)):
@@ -4126,27 +4135,45 @@ class ModuleAutoGen(AutoGen):
 f.close()
 m.update(Content)
 
-ModuleHashFile = path.join(self.BuildDir, self.Name + ".hash")
-if self.Name not in GlobalData.gModuleHash[self.Arch]:
-GlobalData.gModuleHash[self.Arch][self.Name] = m.hexdigest()
-if GlobalData.gBinCacheSource and self.AttemptModuleCacheCopy():
-return False
-return SaveFileOnChange(ModuleHashFile, m.hexdigest(), False)
+GlobalData.gModuleHash[self.Arch][self.Name] = m.hexdigest()
+
+return 
+ GlobalData.gModuleHash[self.Arch][self.Name].encode('utf-8')
 
 ## Decide whether we can skip the ModuleAutoGen process
 def CanSkipbyHash(self):
+# Hashing feature is off
+if not GlobalData.gUseHashCache:
+return False
+
+# Initialize a dictionary for each arch type
+if self.Arch not in GlobalData.gBuildHashSkipTracking:
+GlobalData.gBuildHashSkipTracking[self.Arch] = dict()
+
 # If library or Module is binary do not skip by hash
 if self.IsBinaryModule:
 return False
+
 # .inc is contains binary information so do not skip by hash as well
 for f_ext in self.SourceFileList:
 if '.inc' in str(f_ext):
 return False
-if GlobalData.gUseHashCache:
-# If there is a valid hash or function generated a valid hash; 
function will return False
-# and the statement below will return True
-return not self.GenModuleHash()
-return False
+
+# Use Cache, if exists and if Module has a copy in cache
+if GlobalData.gBinCacheSource and self.AttemptModuleCacheCopy():
+return True
+
+# Early 

[edk2-devel] [Patch V3] BaseTools: Library hashing fix and optimization for --hash feature

2019-05-20 Thread Christian Rodriguez
BZ: https://bugzilla.tianocore.org/show_bug.cgi?id=1788

In V3: Must generate hashes before attempting to copy from cache for
hash verifcation
In V2: Build failure caused by passing incorrect boolean parameter to
SaveFileOnChange(). Fixed for patch instances.

Library hashing is now supported by the --hash feature. The --hash
feature implementation assumed that the hashing could be done in
place once per module, but that isn't true for libraries due to the
fact that they are built as dependencies. So on a clean build, we now
generate the .hash after the library dependencies are complete.
Added early escape as optimization, if hash already exists in memory.

Signed-off-by: Christian Rodriguez 
Cc: Bob Feng 
Cc: Liming Gao 
Cc: Yonghong Zhu 
---
 BaseTools/Source/Python/AutoGen/AutoGen.py   | 53 
-
 BaseTools/Source/Python/Common/GlobalData.py |  6 ++
 BaseTools/Source/Python/build/build.py   |  7 ++-
 3 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/BaseTools/Source/Python/AutoGen/AutoGen.py 
b/BaseTools/Source/Python/AutoGen/AutoGen.py
index 31721a6f9f..81efc4f06c 100644
--- a/BaseTools/Source/Python/AutoGen/AutoGen.py
+++ b/BaseTools/Source/Python/AutoGen/AutoGen.py
@@ -3935,6 +3935,7 @@ class ModuleAutoGen(AutoGen):
 f = open(HashFile, 'r')
 CacheHash = f.read()
 f.close()
+self.GenModuleHash()
 if GlobalData.gModuleHash[self.Arch][self.Name]:
 if CacheHash == GlobalData.gModuleHash[self.Arch][self.Name]:
 for root, dir, files in os.walk(FileDir):
@@ -4093,13 +4094,20 @@ class ModuleAutoGen(AutoGen):
 return RetVal
 
 def GenModuleHash(self):
+# Initialize a dictionary for each arch type
 if self.Arch not in GlobalData.gModuleHash:
 GlobalData.gModuleHash[self.Arch] = {}
-if self.Name in GlobalData.gModuleHash[self.Arch] and 
GlobalData.gBinCacheSource and self.AttemptModuleCacheCopy():
-return False
+
+# Early exit if module or library has been hashed and is in memory
+if self.Name in GlobalData.gModuleHash[self.Arch]:
+return GlobalData.gModuleHash[self.Arch][self.Name].encode('utf-8')
+
+# Initialze hash object
 m = hashlib.md5()
+
 # Add Platform level hash
 m.update(GlobalData.gPlatformHash.encode('utf-8'))
+
 # Add Package level hash
 if self.DependentPackageList:
 for Pkg in sorted(self.DependentPackageList, key=lambda x: 
x.PackageName):
@@ -4118,6 +4126,7 @@ class ModuleAutoGen(AutoGen):
 Content = f.read()
 f.close()
 m.update(Content)
+
 # Add Module's source files
 if self.SourceFileList:
 for File in sorted(self.SourceFileList, key=lambda x: str(x)):
@@ -4126,27 +4135,45 @@ class ModuleAutoGen(AutoGen):
 f.close()
 m.update(Content)
 
-ModuleHashFile = path.join(self.BuildDir, self.Name + ".hash")
-if self.Name not in GlobalData.gModuleHash[self.Arch]:
-GlobalData.gModuleHash[self.Arch][self.Name] = m.hexdigest()
-if GlobalData.gBinCacheSource and self.AttemptModuleCacheCopy():
-return False
-return SaveFileOnChange(ModuleHashFile, m.hexdigest(), False)
+GlobalData.gModuleHash[self.Arch][self.Name] = m.hexdigest()
+
+return GlobalData.gModuleHash[self.Arch][self.Name].encode('utf-8')
 
 ## Decide whether we can skip the ModuleAutoGen process
 def CanSkipbyHash(self):
+# Hashing feature is off
+if not GlobalData.gUseHashCache:
+return False
+
+# Initialize a dictionary for each arch type
+if self.Arch not in GlobalData.gBuildHashSkipTracking:
+GlobalData.gBuildHashSkipTracking[self.Arch] = dict()
+
 # If library or Module is binary do not skip by hash
 if self.IsBinaryModule:
 return False
+
 # .inc is contains binary information so do not skip by hash as well
 for f_ext in self.SourceFileList:
 if '.inc' in str(f_ext):
 return False
-if GlobalData.gUseHashCache:
-# If there is a valid hash or function generated a valid hash; 
function will return False
-# and the statement below will return True
-return not self.GenModuleHash()
-return False
+
+# Use Cache, if exists and if Module has a copy in cache
+if GlobalData.gBinCacheSource and self.AttemptModuleCacheCopy():
+return True
+
+# Early exit for libraries that haven't yet finished building
+HashFile = path.join(self.BuildDir, self.Name + ".hash")
+if self.IsLibrary and not os.path.exists(HashFile):
+return False
+
+# Return a Boolean based on if can skip by hash, either from memory or 
from IO.
+if