On 3/5/2012 7:14 PM, Jona Christopher Sahnwaldt wrote:
> Dear all,
>
> I just checked a few specs to figure out what would be the best policy
> for DBpedia regarding URI encoding.
>
> In summary, I think DBpedia should encode as few characters as
> possible, e.g. use '&', not '%26'.

     I came up with the following encoding function for the path 
component of a URI based on a close reading of RFC 2397.  Any disagreements?

     public static class IRIEscaper {
         StringBuffer out;

         public String escape(String key){
             out=new StringBuffer();
             final int length = key.length();
             for (int offset = 0; offset < length; ) {
                final int codepoint = key.codePointAt(offset);
                transformChar(codepoint);
                offset += Character.charCount(codepoint);
             }

             return out.toString();
         }

         private void transformChar(int cp) {
             char[] rawChars=Character.toChars(cp);
             if(acceptChar(rawChars,cp)) {
                 out.append(Character.toChars(cp));
             } else {
                 percentEncode(rawChars);
             }
         }

         private void percentEncode(char[] rawChars) {
             try {
                 byte[] bytes=new String(rawChars).getBytes("UTF-8");
                 for(byte b:bytes) {
                     out.append('%');
                     out.append(Integer.toHexString(0x00FF & (int) 
b).toUpperCase());
                 }
             } catch(UnsupportedEncodingException ex) {
                 throw new RuntimeException(ex);
             }
         }

         //
         // this code should implement the 'ipchar' production from
         //
         // http://www.apps.ietf.org/rfc/rfc3986.html
         //

         private boolean acceptChar(char[] chars,int cp) {
             if(chars.length==1) {
                 char c=chars[0];
                 if(Character.isLetterOrDigit(c))
                     return true;

                 if(c=='-' || c=='.' || c=='_' || c=='~')
                     return true;

                 if(c=='!' || c=='$' || c=='&' || c=='\'' || c=='(' || 
c==')'
                     || c=='*' || c=='+' || c==',' || c==';' || c=='='
                         || c== ':' || c=='@')
                     return true;

                 if (cp<0xA0)
                     return false;
             }

             if(cp>=0xA0 && cp<=0xD7FF)
                 return true;

             if(cp>=0xF900 && cp<=0xFDCF)
                 return true;

             if(cp>=0xFDF0 && cp<=0xFFEF)
                 return true;

             if (cp>=0x10000 && cp<=0x1FFFD)
                 return true;

             if (cp>=0x20000 && cp<=0x2FFFD)
                 return true;

             if (cp>=0x30000 && cp<=0x3FFFD)
                 return true;

             if (cp>=0x40000 && cp<=0x4FFFD)
                 return true;

             if (cp>=0x50000 && cp<=0x5FFFD)
                 return true;

             if (cp>=0x60000 && cp<=0x6FFFD)
                 return true;

             if (cp>=0x70000 && cp<=0x7FFFD)
                 return true;

             if (cp>=0x80000 && cp<=0x8FFFD)
                 return true;

             if (cp>=0x90000 && cp<=0x9FFFD)
                 return true;

             if (cp>=0xA0000 && cp<=0xAFFFD)
                 return true;

             if (cp>=0xB0000 && cp<=0xBFFFD)
                 return true;

             if (cp>=0xC0000 && cp<=0xCFFFD)
                 return true;

             if (cp>=0xD0000 && cp<=0xDFFFD)
                 return true;

             if (cp>=0xE1000 && cp<=0xEFFFD)
                 return true;

             return false;
         }
     }



------------------------------------------------------------------------------
Keep Your Developer Skills Current with LearnDevNow!
The most comprehensive online learning library for Microsoft developers
is just $99.99! Visual Studio, SharePoint, SQL - plus HTML5, CSS3, MVC3,
Metro Style Apps, more. Free future releases when you subscribe now!
http://p.sf.net/sfu/learndevnow-d2d
_______________________________________________
Dbpedia-discussion mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/dbpedia-discussion

Reply via email to