Thanks, Alon! This does indeed seem to be the issue. In library_syscall.js,
the "st_size" member is considered am i32 (see below). I do not yet fully
understand how C_STRUCTS is generated. I can see that compiler.js receives
a JSON object STRUCT_INFO that contains the type definitions. Is this
generated from the musl headers?
doStat: function(func, path, buf) {
try {
var stat = func(path);
} catch (e) {
if (e && e.node && PATH.normalize(path) !== PATH.normalize(FS.getPath(e.node)))
{
// an error occurred while trying to look up the path; we should just
report ENOTDIR
return -ERRNO_CODES.ENOTDIR;
}
throw e;
}
{{{ makeSetValue('buf', C_STRUCTS.stat.st_dev, 'stat.dev', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.__st_dev_padding, '0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.__st_ino_truncated, 'stat.ino', 'i32')
}}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_mode, 'stat.mode', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_nlink, 'stat.nlink', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_uid, 'stat.uid', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_gid, 'stat.gid', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_rdev, 'stat.rdev', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.__st_rdev_padding, '0', 'i32') }}};
*{{{ makeSetValue('buf', C_STRUCTS.stat.st_size, 'stat.size', 'i32') }}};*
{{{ makeSetValue('buf', C_STRUCTS.stat.st_blksize, '4096', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_blocks, 'stat.blocks', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_atim.tv_sec, '(stat.atime.getTime()
/ 1000)|0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_atim.tv_nsec, '0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_mtim.tv_sec, '(stat.mtime.getTime()
/ 1000)|0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_mtim.tv_nsec, '0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_ctim.tv_sec, '(stat.ctime.getTime()
/ 1000)|0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_ctim.tv_nsec, '0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_ino, 'stat.ino', 'i32') }}};
return 0;
},
On Saturday, March 3, 2018 at 6:27:32 AM UTC+10, Alon Zakai wrote:
>
> It's possible the issue is that 64-bit integers are passed as two 32-bit
> integers, in which case the fix is to receive/send those properly (maybe
> only the low bits are received, for example). Building with LIBRARY_DEBUG=1
> or SYSCALL_DEBUG=1 might help here, it will print out each call with
> arguments and return value, so you can find which syscall is relevant.
>
> However, it's also possible the issue is that musl uses a 32-bit signed
> integer for those syscalls, in which case the syscall interface would need
> to be changed.
>
> On Thu, Mar 1, 2018 at 11:24 PM, Sören Balko <[email protected]
> <javascript:>> wrote:
>
>> Hi,
>>
>> I have run into an issue where our code tries to read very large files
>> (>2^31 bytes in size) and is effectively running into what looks like an
>> integer overflow issue. What happens is that the int64_t members of stat_t
>> ("size") and also the return value of llseek are implicitly down-cast into
>> signed ints. Here is what we do to mount our file system (slightly
>> simplified for brevity):
>>
>> var node = Module.FS.createFile('/', emscriptenPath, null,
>> true, true);
>>
>> node.node_ops = {
>> getattr: function(ganode) {
>> return {
>> dev: 1,
>> ino: ganode.id,
>> mode: ganode.mode,
>> nlink: 1,
>> uid: 0,
>> gid: 0,
>> rdev: ganode.rdev,
>> size: size, // <-- this is a file size > 2^31
>> atime: new Date(ganode.timestamp),
>> mtime: new Date(ganode.timestamp),
>> ctime: new Date(ganode.timestamp),
>> blksize: 4096,
>> blocks: Math.ceil(size / 4096)
>> };
>> }
>> };
>>
>> node.stream_ops = {
>> llseek: function(stream, offset, whence) {
>> switch (whence) {
>> case 0: // SEEK_SET
>> stream.position = offset;
>> break;
>> case 1: // SEEK_CUR
>> stream.positon += offset;
>> break;
>> case 2: // SEEK_END
>> stream.position = size + offset;
>> break;
>> default:
>> throw new Module.FS.ErrnoError(22); // EINVAL
>> }
>>
>> return stream.position; // <-- can be > 2^31
>> },
>> read: function(stream, buffer, heapOffset, numberOfBytes,
>> fileOffset) {
>> // ...
>> }
>> };
>>
>> I suspect that the issue arises from the fact that int64_t has no native
>> counterpart in JS and is, hence, downcast in the interface between the
>> asm.js and the file system code. Is there a quick fix to address this
>> issue? I tried -s PRECISE_I64_MATH=2, but to no avail. Also, I am not
>> entirely sure where exactly the precision is lost. I guess, it happens in
>> the __syscallXY functions for fstat, lseek (and probably also for the
>> arguments passed into read).
>>
>> One idea I had was to patch the syscalls in a way that I render the
>> int64_t values as strings on the heap and pass back the pointer to that
>> string inside the stat_t structure and the return value of llseek. These
>> strings would then have to be parsed back into int64_t values inside the
>> syscalls. Not exactly elegant, but it might work. Or is there a generic
>> solution?
>>
>> Thanks heaps in advance for any suggestions...
>>
>> Soeren
>>
>> --
>> You received this message because you are subscribed to the Google Groups
>> "emscripten-discuss" group.
>> To unsubscribe from this group and stop receiving emails from it, send an
>> email to [email protected] <javascript:>.
>> For more options, visit https://groups.google.com/d/optout.
>>
>
>
--
You received this message because you are subscribed to the Google Groups
"emscripten-discuss" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.