Thanks, Alon! This does indeed seem to be the issue. In library_syscall.js, 
the "st_size" member is considered am i32 (see below). I do  not yet fully 
understand how C_STRUCTS is generated. I can see that compiler.js receives 
a JSON object STRUCT_INFO that contains the type definitions. Is this 
generated from the musl headers? 

doStat: function(func, path, buf) {
try {
var stat = func(path);
} catch (e) {
if (e && e.node && PATH.normalize(path) !== PATH.normalize(FS.getPath(e.node))) 
{
// an error occurred while trying to look up the path; we should just 
report ENOTDIR
return -ERRNO_CODES.ENOTDIR;
}
throw e;
}
{{{ makeSetValue('buf', C_STRUCTS.stat.st_dev, 'stat.dev', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.__st_dev_padding, '0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.__st_ino_truncated, 'stat.ino', 'i32') 
}}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_mode, 'stat.mode', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_nlink, 'stat.nlink', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_uid, 'stat.uid', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_gid, 'stat.gid', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_rdev, 'stat.rdev', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.__st_rdev_padding, '0', 'i32') }}};
*{{{ makeSetValue('buf', C_STRUCTS.stat.st_size, 'stat.size', 'i32') }}};*
{{{ makeSetValue('buf', C_STRUCTS.stat.st_blksize, '4096', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_blocks, 'stat.blocks', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_atim.tv_sec, '(stat.atime.getTime() 
/ 1000)|0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_atim.tv_nsec, '0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_mtim.tv_sec, '(stat.mtime.getTime() 
/ 1000)|0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_mtim.tv_nsec, '0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_ctim.tv_sec, '(stat.ctime.getTime() 
/ 1000)|0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_ctim.tv_nsec, '0', 'i32') }}};
{{{ makeSetValue('buf', C_STRUCTS.stat.st_ino, 'stat.ino', 'i32') }}};
return 0;
},
On Saturday, March 3, 2018 at 6:27:32 AM UTC+10, Alon Zakai wrote:
>
> It's possible the issue is that 64-bit integers are passed as two 32-bit 
> integers, in which case the fix is to receive/send those properly (maybe 
> only the low bits are received, for example). Building with LIBRARY_DEBUG=1 
> or SYSCALL_DEBUG=1 might help here, it will print out each call with 
> arguments and return value, so you can find which syscall is relevant.
>
> However, it's also possible the issue is that musl uses a 32-bit signed 
> integer for those syscalls, in which case the syscall interface would need 
> to be changed.
>
> On Thu, Mar 1, 2018 at 11:24 PM, Sören Balko <[email protected] 
> <javascript:>> wrote:
>
>> Hi,
>>
>> I have run into an issue where our code tries to read very large files 
>> (>2^31 bytes in size) and is effectively running into what looks like an 
>> integer overflow issue. What happens is that the int64_t members of stat_t 
>> ("size") and also the return value of llseek are implicitly down-cast into 
>> signed ints. Here is what we do to mount our file system (slightly 
>> simplified for brevity):
>>
>>          var node = Module.FS.createFile('/', emscriptenPath, null, 
>> true, true);
>>
>>     node.node_ops = {
>>         getattr: function(ganode) {
>>             return {
>>                 dev: 1,
>>                 ino: ganode.id,
>>                 mode: ganode.mode,
>>                 nlink: 1,
>>                 uid: 0,
>>                 gid: 0,
>>                 rdev: ganode.rdev,
>>                 size: size,  // <-- this is a file size > 2^31
>>                 atime: new Date(ganode.timestamp),
>>                 mtime: new Date(ganode.timestamp),
>>                 ctime: new Date(ganode.timestamp),
>>                 blksize: 4096,
>>                 blocks: Math.ceil(size / 4096)
>>             };
>>         }        
>>     };
>>
>>     node.stream_ops = {
>>         llseek: function(stream, offset, whence) {
>>             switch (whence) {
>>                 case 0: // SEEK_SET
>>                 stream.position = offset;
>>                 break;
>>                 case 1: // SEEK_CUR
>>                 stream.positon += offset;
>>                 break;
>>                 case 2: // SEEK_END
>>                 stream.position = size + offset;
>>                 break;
>>                 default:
>>                 throw new Module.FS.ErrnoError(22); // EINVAL
>>             }
>>
>>             return stream.position; // <-- can be > 2^31
>>         }, 
>>         read: function(stream, buffer, heapOffset, numberOfBytes, 
>> fileOffset) {
>>             // ...
>>         } 
>>     };
>>
>> I suspect that the issue arises from the fact that int64_t has no native 
>> counterpart in JS and is, hence, downcast in the interface between the 
>> asm.js and the file system code. Is there a quick fix to address this 
>> issue? I tried -s PRECISE_I64_MATH=2, but to no avail. Also, I am not 
>> entirely sure where exactly the precision is lost. I guess, it happens in 
>> the __syscallXY functions for fstat, lseek (and probably also for the 
>> arguments passed into read). 
>>
>> One idea I had was to patch the syscalls in a way that I render the 
>> int64_t values as strings on the heap and pass back the pointer to that 
>> string inside the stat_t structure and the return value of llseek. These 
>> strings would then have to be parsed back into int64_t values inside the 
>> syscalls. Not exactly elegant, but it might work. Or is there a generic 
>> solution?
>>
>> Thanks heaps in advance for any suggestions...
>>
>> Soeren
>>
>> -- 
>> You received this message because you are subscribed to the Google Groups 
>> "emscripten-discuss" group.
>> To unsubscribe from this group and stop receiving emails from it, send an 
>> email to [email protected] <javascript:>.
>> For more options, visit https://groups.google.com/d/optout.
>>
>
>

-- 
You received this message because you are subscribed to the Google Groups 
"emscripten-discuss" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to