Package: file
Version: 4.15-2
Followup-For: Bug #345928

this is the patch:
it recognise the first three byte which indicate utf-8 encoding, even if 
content is not utf-8.

--- file-4.15/src/ascmagic.c    2005-06-30 18:33:01.000000000 +0200
+++ file-4.15.modif_jmg/src/ascmagic.c  2006-01-11 17:48:39.000000000
+0100
@@ -59,6 +59,7 @@
                  || (x) == 0x85 || (x) == '\f')

 private int looks_ascii(const unsigned char *, size_t, unichar *,
size_t *);
+private int looks_utf8_header(const unsigned char *buf, size_t nbytes,
unichar *ubuf, size_t *ulen);
 private int looks_utf8(const unsigned char *, size_t, unichar *, size_t
*);
 private int looks_unicode(const unsigned char *, size_t, unichar *,
size_t *);
 private int looks_latin1(const unsigned char *, size_t, unichar *,
size_t *);
@@ -121,6 +122,10 @@
                code = "UTF-8 Unicode";
                code_mime = "utf-8";
                type = "text";
+       } else if (looks_utf8_header(buf, nbytes, ubuf, &ulen)) {
+               code = "UTF-8 broken";
+               code_mime = "utf-8 ???";
+               type = "text";
        } else if ((i = looks_unicode(buf, nbytes, ubuf, &ulen)) != 0) {
                if (i == 1)
                        code = "Little-endian UTF-16 Unicode";
@@ -512,6 +517,27 @@
 }

 private int
+looks_utf8_header(const unsigned char *buf, size_t nbytes, unichar
*ubuf, size_t *ulen)
+{
+       unsigned char  utf8_header []= {0xef, 0xbb, 0xbf  };
+       int got = 1;
+       int i;
+       *ulen = 0;
+       if (nbytes <3)
+       {
+               return 0;
+       }
+       for (i = 0; i < 3; i++) {
+               if ( buf[i] != utf8_header [i] )
+               {
+                       got = 0;
+               }
+               //printf ("DEBUG:   '%d' , '%d' : [%d] -> %d\n", buf[i],
utf8_header[i] , i, got);
+       }
+       return got;
+}
+
+private int
 looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
 {
        int i, n;



-- System Information:
Debian Release: testing/unstable
  APT prefers testing
  APT policy: (500, 'testing'), (500, 'stable')
Architecture: i386 (i686)
Shell:  /bin/sh linked to /bin/bash
Kernel: Linux 2.6.12-1-686
Locale: [EMAIL PROTECTED], [EMAIL PROTECTED] (charmap=ISO-8859-15)

Versions of packages file depends on:
ii  libc6                         2.3.5-8    GNU C Library: Shared libraries an
ii  libmagic1                     4.15-2     File type determination library us
ii  zlib1g                        1:1.2.3-9  compression library - runtime

file recommends no packages.

-- no debconf information


-- 
To UNSUBSCRIBE, email to [EMAIL PROTECTED]
with a subject of "unsubscribe". Trouble? Contact [EMAIL PROTECTED]

Reply via email to