On 3/5/2012 7:14 PM, Jona Christopher Sahnwaldt wrote:
> Dear all,
>
> I just checked a few specs to figure out what would be the best policy
> for DBpedia regarding URI encoding.
>
> In summary, I think DBpedia should encode as few characters as
> possible, e.g. use '&', not '%26'.
I came up with the following encoding function for the path
component of a URI based on a close reading of RFC 2397. Any disagreements?
public static class IRIEscaper {
StringBuffer out;
public String escape(String key){
out=new StringBuffer();
final int length = key.length();
for (int offset = 0; offset < length; ) {
final int codepoint = key.codePointAt(offset);
transformChar(codepoint);
offset += Character.charCount(codepoint);
}
return out.toString();
}
private void transformChar(int cp) {
char[] rawChars=Character.toChars(cp);
if(acceptChar(rawChars,cp)) {
out.append(Character.toChars(cp));
} else {
percentEncode(rawChars);
}
}
private void percentEncode(char[] rawChars) {
try {
byte[] bytes=new String(rawChars).getBytes("UTF-8");
for(byte b:bytes) {
out.append('%');
out.append(Integer.toHexString(0x00FF & (int)
b).toUpperCase());
}
} catch(UnsupportedEncodingException ex) {
throw new RuntimeException(ex);
}
}
//
// this code should implement the 'ipchar' production from
//
// http://www.apps.ietf.org/rfc/rfc3986.html
//
private boolean acceptChar(char[] chars,int cp) {
if(chars.length==1) {
char c=chars[0];
if(Character.isLetterOrDigit(c))
return true;
if(c=='-' || c=='.' || c=='_' || c=='~')
return true;
if(c=='!' || c=='$' || c=='&' || c=='\'' || c=='(' ||
c==')'
|| c=='*' || c=='+' || c==',' || c==';' || c=='='
|| c== ':' || c=='@')
return true;
if (cp<0xA0)
return false;
}
if(cp>=0xA0 && cp<=0xD7FF)
return true;
if(cp>=0xF900 && cp<=0xFDCF)
return true;
if(cp>=0xFDF0 && cp<=0xFFEF)
return true;
if (cp>=0x10000 && cp<=0x1FFFD)
return true;
if (cp>=0x20000 && cp<=0x2FFFD)
return true;
if (cp>=0x30000 && cp<=0x3FFFD)
return true;
if (cp>=0x40000 && cp<=0x4FFFD)
return true;
if (cp>=0x50000 && cp<=0x5FFFD)
return true;
if (cp>=0x60000 && cp<=0x6FFFD)
return true;
if (cp>=0x70000 && cp<=0x7FFFD)
return true;
if (cp>=0x80000 && cp<=0x8FFFD)
return true;
if (cp>=0x90000 && cp<=0x9FFFD)
return true;
if (cp>=0xA0000 && cp<=0xAFFFD)
return true;
if (cp>=0xB0000 && cp<=0xBFFFD)
return true;
if (cp>=0xC0000 && cp<=0xCFFFD)
return true;
if (cp>=0xD0000 && cp<=0xDFFFD)
return true;
if (cp>=0xE1000 && cp<=0xEFFFD)
return true;
return false;
}
}
------------------------------------------------------------------------------
Keep Your Developer Skills Current with LearnDevNow!
The most comprehensive online learning library for Microsoft developers
is just $99.99! Visual Studio, SharePoint, SQL - plus HTML5, CSS3, MVC3,
Metro Style Apps, more. Free future releases when you subscribe now!
http://p.sf.net/sfu/learndevnow-d2d
_______________________________________________
Dbpedia-discussion mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/dbpedia-discussion