I'm working on some Julia code to read XDR data/files (something that might
be generated from R using save).
I have a function read_attribute_list to read the attributes associated
with an R Object which returns a Dict. I also have a function read_object
to read an R Object which calls read_attribute_list.
function read_attribute_list(rr::RReader)
result = Dict{String,RObject}()
typeTag = get_type_tag(rr)
defaultRType = RType()
while getRType(typeTag) == defaultRType.LISTSXP
tagFlag = getTagFlag(typeTag)
tag = None
if tagFlag
tag = read_symbol_or_string(rr)
end
car = read_object(rr)
if car != None
result[tag] = car
end
typeTag = get_type_tag(rr)
end
if getRType(typeTag) != defaultRType.EMPTYTAIL &&
getRType(typeTag) != defaultRType.STRSXP
error("Not Implemented")
end
return result # result remains a Dict here and can be printed right
before
end
function read_object(rr::RReader)
typeTag = get_type_tag(rr)
lengthVar = 0
result = None
rType = getRType(typeTag)
defaultRType = RType()
if rType == defaultRType.LGLSXP
lengthVar = readInt32(rr.reader)
boolValues = @data(bitunpack(falses(lengthVar)))
for i in 1:lengthVar
flag = read_integer(rr)
if flag == 0 || flag == 1
boolValues[i] = !(flag == 0)
elseif flag == None
boolValues[i] = NA
else
error("Invalid boolean flag from XDR: $(flag)")
end
end
result = RObject(boolValues)
elseif rType == defaultRType.INTSXP
lengthVar = readInt32(rr.reader)
values = @data(zeros(Int32, lengthVar))
for i in 1:lengthVar
value = read_integer(rr)
if value == None
values[i] = NA
else
values[i] = value
end
end
result = RObject(values)
elseif rType == defaultRType.REALSXP
lengthVar = readInt32(rr.reader)
values = @data(zeros(Float64, lengthVar))
for i in 1:lengthVar
value = readFloat64(rr.reader)
if isnan(value)
values[i] = NA
else
values[i] = value
end
end
result = RObject(values)
elseif rType == defaultRType.CPLXSXP
lengthVar = readInt32(rr.reader)
complexValues = @data(zeros(Complex128,lengthVar))
for i in 1:lengthVar
realVal = readFloat64(rr.reader)
imaginaryVal = readFloat64(rr.reader)
if !isnan(realVal) && !isnan(imaginaryVal)
complexValues[i] = complex128(realVal, imaginaryVal)
else
complexValues[i] = NA
end
end
result = RObject(complexValues)
elseif rType == defaultRType.STRSXP
lengthVar = readInt32(rr.reader)
stringVector = @data(fill("", lengthVar))
for i in 1:lengthVar
val = read_symbol_or_string(rr)
stringVector[i] = val != None ? val : NA
end
result = RObject(stringVector)
elseif rType == defaultRType.VECSXP
lengthVar = readInt32(rr.reader)
vectorObject = @data(fill!(Array(Any,lengthVar),None))
for i in 1:lengthVar
vectorObject[i] = read_object(rr)
end
result = RObject(vectorObject)
elseif rType == defaultRType.EXTPTRSXP
readInt64(rr.reader)
result = None
elseif rType == defaultRType.RAWSXP
rawLength = readInt32(rr.reader)
result = RObject(readbytes(rr.reader, rawLength), is_raw = true)
elseif rType == defaultRType.LANGSXP
result = RObject(None)
att_list = read_attribute_list(rr) # att_list returns nothing DataType
setAttributes(result, att_list)
elseif rType == defaultRType.SYMSXP
symbol = read_symbol_or_string(rr)
push!(rr.symbols, symbol)
return RObject([symbol])
else
error("Unexpected type")
end
if getAttributeFlag(typeTag)
att_list = read_attribute_list(rr)
setAttributes(result, att_list)
end
return result
end
As read_attribute_list is called, the attributes are read correctly and the
Dict can be viewed by printing it right before the return statement.
However, in the highlighted code for read_object, the att_list returned is
a nothing DataType and the Dict that was visible before is lost.
I can't figure out what would be causing this bug. Some things that I have
tried that might be helpful:
1) Turn off gc(). I disable garbage collection and run the code. The
behavior remains the same and nothing is returned.
2) Return a list of tuples instead. Instead of returning a Dict, I return
Array{(String,RObject)} to serialize the Dict. The behavior remains the
same and nothing is returned.
3) Reproduce the bug outside of the module. I extract the part of the XDR
file with only the attributes. read_attribute_list reads properly and
returns the Dict.
Feel free to ask questions about other pieces of the code or implementation
details. Any help with this would be appreciated!