On Mon, Nov 06, 2017 at 07:35:55PM +0300, Joannah Nanjekye wrote: > Users tend to hit an ImportError when running analyze-migration.py due > to the numpy dependency. numpy functionality isn't actually used, just > binary serialization that the standard library 'struct' module already > provides. Removing the dependency allows the script to run > out-of-the-box. > > Signed-off-by: Joannah Nanjekye <nanjekyejoan...@gmail.com> > --- > scripts/analyze-migration.py | 23 +++++++++++++++-------- > 1 file changed, 15 insertions(+), 8 deletions(-) > > diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py > index 1455387..6175c99 100755 > --- a/scripts/analyze-migration.py > +++ b/scripts/analyze-migration.py > @@ -17,7 +17,6 @@ > # You should have received a copy of the GNU Lesser General Public > # License along with this library; if not, see > <http://www.gnu.org/licenses/>. > > -import numpy as np > import json > import os > import argparse > @@ -36,23 +35,29 @@ class MigrationFile(object): > self.file = open(self.filename, "rb") > > def read64(self): > - return np.asscalar(np.fromfile(self.file, count=1, dtype='>i8')[0])
dtype='>i8' is a 64-bit (8 bytes) big-endian signed integer according to https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.dtypes.html#arrays-dtypes-constructing. > + buffer = file.read(64) This reads 64 bytes (not bits!). It should read 8 bytes. > + return struct.unpack('>i16', buffer)[0] This should be '>q' according to https://docs.python.org/2.7/library/struct.html#format-strings. > > def read32(self): > - return np.asscalar(np.fromfile(self.file, count=1, dtype='>i4')[0]) > + buffer = file.read(32) read(4) > + return struct.unpack('>i8', buffer)[0] '>i' > > def read16(self): > - return np.asscalar(np.fromfile(self.file, count=1, dtype='>i2')[0]) > + buffer = file.read(16) read(2) > + return struct.unpack('>i4', buffer)[0] '>h' > > def read8(self): > - return np.asscalar(np.fromfile(self.file, count=1, dtype='>i1')[0]) > + buffer = file.read(8) read(1) > + return struct.unpack('>i2', buffer)[0] 'b' > > def readstr(self, len = None): > + read_format = str(len) + 'd' > if len is None: > len = self.read8() > if len == 0: > return "" > - return np.fromfile(self.file, count=1, dtype=('S%d' % len))[0] > + buffer = file.read(8) > + return struct.unpack(read_format, buffer[0:(0 + > struct.calcsize(read_format))]) According to the numpy documentation 'S' produces raw bytes (not a unicode string). To get the raw bytes we just need file.read(len). The struct module isn't needed. Something like this should work: if len is None: len = self.read8() if len == 0: return "" return file.read(len).split(b'\0')[0] > > def readvar(self, size = None): > if size is None: > @@ -303,8 +308,10 @@ class VMSDFieldInt(VMSDFieldGeneric): > > def read(self): > super(VMSDFieldInt, self).read() > - self.sdata = np.fromstring(self.data, count=1, > dtype=(self.sdtype))[0] > - self.udata = np.fromstring(self.data, count=1, > dtype=(self.udtype))[0] > + buffer = file.read(self.data) This statement doesn't make sense. According to the Python documentation: read([size]) -> read at most size bytes, returned as a string. self.data *is* the buffer. There's no need to call file.read(). > + read_format = self.sdtype Unused. > + self.sdata = struct.unpack(self.sdtype, buffer[0:(0 + > struct.calcsize(self.sdtype))]) struct.calcsize() is unnecessary since self.size already contains the size in bytes. struct.unpack returns a tuple and we need the first element, so it should be: self.sdata = struct.unpack(...)[0] > + self.sdata = struct.unpack(self.udtype, buffer[0:(0 + > struct.calcsize(self.udtype))]) The data type specifiers are incorrect because the struct module uses different syntax than numpy: self.sdtype = '>i%d' % self.size self.udtype = '>u%d' % self.size The struct data type specifiers can be looked up like this: sdtypes = {1: 'b', 2: '>h', 4: '>i', 8: '>q'} udtypes = {1: 'B', 2: '>H', 4: '>I', 8: '>Q'} self.sdtype = sdtypes[self.size] self.udtype = udtypes[self.size]
signature.asc
Description: PGP signature