Sumit Gupta created KNOX-767:
--------------------------------

             Summary: Knox transforms XML files written to WebHDFS
                 Key: KNOX-767
                 URL: https://issues.apache.org/jira/browse/KNOX-767
             Project: Apache Knox
          Issue Type: Bug
            Reporter: Sumit Gupta
             Fix For: 0.11.0


When you write an XML file to WebHDFS through Knox with the Content-Type header 
set to text/xml or application/xml it is transformed by Knox so that empty tags 
like <xyz/> are written as <xyz></xyz> and CDATA is interpreted. This does not 
happen if written directly to WebHDFS. For example:

{code}

[root@hdp250 ~]# cat xxx
<document>
   <empty/>
   <![CDATA[<xyz>wibble</xyz>]]>
</document>

[root@hdp250 ~]# curl -u guest:guest-password -i -k -X PUT 
"https://hdp250.local:8443/gateway/default/webhdfs/v1/tmp/xxx?op=CREATE&overwrite=true";
HTTP/1.1 307 Temporary Redirect
Date: Thu, 27 Oct 2016 19:25:35 GMT
Set-Cookie: 
JSESSIONID=bt3timb9jl7k546fcntrj8s;Path=/gateway/default;Secure;HttpOnly
Expires: Thu, 01 Jan 1970 00:00:00 GMT
Set-Cookie: rememberMe=deleteMe; Path=/gateway/default; Max-Age=0; Expires=Wed, 
26-Oct-2016 19:25:35 GMT
Cache-Control: no-cache
Expires: Thu, 27 Oct 2016 19:25:35 GMT
Date: Thu, 27 Oct 2016 19:25:35 GMT
Pragma: no-cache
Expires: Thu, 27 Oct 2016 19:25:35 GMT
Date: Thu, 27 Oct 2016 19:25:35 GMT
Pragma: no-cache
Content-Type: application/octet-stream
X-FRAME-OPTIONS: SAMEORIGIN
Location: 
https://hdp250.local:8443/gateway/default/webhdfs/data/v1/webhdfs/v1/tmp/xxx?_=AAAACAAAABAAAAEQ-XO5sAM86ubmjRdUYXJEZkpM4Vdv3vmIprBetQwfaKaZNN4uc9O1IN8jujDD9GpPPCDJCKxebul_GlCFxDIZzbkhZ1tnhY5rZ6V12SVJgLo5DxMxC8zECeaM4M8OFLqHxamNnvduuUkD5y23RJczzHHJ9SyYuG6yiCpDJKB_5MffZIWFaEEcYM7jOkjStZHU_7cjIg_vRJL2nFCVTWKf1FPkB00QCbXHN-Ua6MfEG8p2aoQB70tfVHnmhhnBWx2PZARJ-kHp42rrpA1yrI86v3Q-OGI4Ya3pnPRWhPj0wbdDr_p_FDinsw2KRu1_aRSIXXznmJ--aX6TflbBGZvDImkw4x0QM48UGFpOChaLtHk73rlMMUbbbAwOew0gJ2-69PuXiL4QB48
Server: Jetty(6.1.26.hwx)
Content-Length: 0

[root@hdp250 ~]# curl -u guest:guest-password -i -k -X PUT -T xxx -H 
'Content-Type: text/xml' 
"https://hdp250.local:8443/gateway/default/webhdfs/data/v1/webhdfs/v1/tmp/xxx?_=AAAACAAAABAAAAEQ-XO5sAM86ubmjRdUYXJEZkpM4Vdv3vmIprBetQwfaKaZNN4uc9O1IN8jujDD9GpPPCDJCKxebul_GlCFxDIZzbkhZ1tnhY5rZ6V12SVJgLo5DxMxC8zECeaM4M8OFLqHxamNnvduuUkD5y23RJczzHHJ9SyYuG6yiCpDJKB_5MffZIWFaEEcYM7jOkjStZHU_7cjIg_vRJL2nFCVTWKf1FPkB00QCbXHN-Ua6MfEG8p2aoQB70tfVHnmhhnBWx2PZARJ-kHp42rrpA1yrI86v3Q-OGI4Ya3pnPRWhPj0wbdDr_p_FDinsw2KRu1_aRSIXXznmJ--aX6TflbBGZvDImkw4x0QM48UGFpOChaLtHk73rlMMUbbbAwOew0gJ2-69PuXiL4QB48";
HTTP/1.1 100 Continue

HTTP/1.1 201 Created
Date: Thu, 27 Oct 2016 19:25:54 GMT
Set-Cookie: 
JSESSIONID=3o27jby7c2a6mdpxducddqac;Path=/gateway/default;Secure;HttpOnly
Expires: Thu, 01 Jan 1970 00:00:00 GMT
Set-Cookie: rememberMe=deleteMe; Path=/gateway/default; Max-Age=0; Expires=Wed, 
26-Oct-2016 19:25:54 GMT
Location: https://hdp250.local:8443/gateway/default/webhdfs/v1/tmp/xxx
Connection: close
Server: Jetty(9.2.15.v20160210)

[root@hdp250 ~]# hdfs dfs -cat /tmp/xxx
<?xml version="1.0" standalone="no"?><document>
   <empty></empty>
   &lt;xyz&gt;wibble&lt;/xyz&gt;
</document>

{code}




--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to