Here is an implementation of FindMimeFromData, based off of MSDN
http://msdn.microsoft.com/workshop/networking/moniker/overview/appendix_a.asp

There is lots of debate on the 'net regarding this "feature" in IE, most of it 
suggesting
this is a bad idea..

One possible change that would work more "standards compliant" rather then "Windows 
like"
would be to return the proposed MIME type unchanged if one is given, otherwise go 
through
the detection methods..let me know if that would be a preferred solution

Changelog
        Implement FindMimeFromData
Index: dlls/urlmon/Makefile.in
===================================================================
RCS file: /home/wine/wine/dlls/urlmon/Makefile.in,v
retrieving revision 1.18
diff -u -r1.18 Makefile.in
--- dlls/urlmon/Makefile.in	27 Jan 2004 00:11:16 -0000	1.18
+++ dlls/urlmon/Makefile.in	18 Mar 2004 16:32:31 -0000
@@ -3,10 +3,11 @@
 SRCDIR    = @srcdir@
 VPATH     = @srcdir@
 MODULE    = urlmon.dll
-IMPORTS   = cabinet ole32 wininet user32 kernel32 ntdll
+IMPORTS   = shlwapi advapi32 cabinet ole32 wininet user32 kernel32 ntdll
 EXTRALIBS = -luuid
 
 C_SRCS = \
+	mime.c \
 	umon.c \
 	urlmon_main.c
 
Index: dlls/urlmon/umon.c
===================================================================
RCS file: /home/wine/wine/dlls/urlmon/umon.c,v
retrieving revision 1.26
diff -u -r1.26 umon.c
--- dlls/urlmon/umon.c	23 Jan 2004 01:51:34 -0000	1.26
+++ dlls/urlmon/umon.c	18 Mar 2004 16:32:32 -0000
@@ -919,20 +919,6 @@
 }
 
 /***********************************************************************
- *           FindMimeFromData (URLMON.@)
- *
- * Determines the Multipurpose Internet Mail Extensions (MIME) type from the data provided.
- *
- */
-HRESULT WINAPI FindMimeFromData(LPBC pBC, LPCWSTR pwzUrl, LPVOID pBuffer,
-   DWORD cbSize, LPCWSTR pwzMimeProposed, DWORD dwMimeFlags,
-   LPWSTR* ppwzMimeOut, DWORD dwReserved)
-{
-  FIXME("stub\n");
-  return E_OUTOFMEMORY;
-}
-
-/***********************************************************************
  *           IsAsyncMoniker (URLMON.@)
  */
 HRESULT WINAPI IsAsyncMoniker(IMoniker *pmk)
--- /dev/null	1969-12-31 19:00:00.000000000 -0500
+++ dlls/urlmon/mime.c	2004-03-18 11:32:50.567275960 -0500
@@ -0,0 +1,291 @@
+/*
+ * UrlMon - MIME detection
+ *
+ * Copyright 2004 Kevin Koltzau
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <stdarg.h>
+
+#include "windef.h"
+#include "winbase.h"
+#include "winreg.h"
+#include "winuser.h"
+#include "shlwapi.h"
+
+#include "wine/debug.h"
+
+WINE_DEFAULT_DEBUG_CHANNEL(urlmon);
+
+typedef BOOL (*MimeCheckFormat)(LPVOID pBuffer, DWORD cbSize);
+
+typedef struct _MIME_MASK {
+    const char *data;
+    int len;
+    int start;
+    BOOL or;
+} MIME_MASK;
+
+typedef struct _KNOWN_MIME_TYPES {
+    const WCHAR mime[32];
+    BOOL binary;
+    const MIME_MASK *mask;
+    MimeCheckFormat checkFormat;
+} KNOWN_MIME_TYPES;
+
+BOOL URLMON_ScanHTML(LPVOID pBuffer, DWORD cbSize)
+{
+    WCHAR szHtmlW[] = {'h','t','m','l','\0'};
+    char szHtmlA[] = "html";
+    WCHAR szDoctypeW[] = {'<','!','d','o','c','t','y','p','e',' ','h','t','m','l','\0'};
+    char szDoctypeA[] = "<!doctype html";
+    WCHAR szBodyW[] = {'b','o','d','y','\0'};
+    char szBodyA[] = "body";
+    WCHAR szHeadW[] = {'h','e','a','d','\0'};
+    char szHeadA[] = "head";
+    WCHAR szTitleW[] = {'t','i','t','l','e','\0'};
+    char szTitleA[] = "title";
+
+    BOOL ishtml = FALSE;
+    
+    /* Don't fail just because the buffer is odd */
+    if(cbSize%2) cbSize--;
+    /* Make a copy so we can null terminate it */
+    char *buffer = (char*)HeapAlloc(GetProcessHeap(), 0, cbSize+2);
+    CopyMemory(buffer, pBuffer, cbSize);
+    buffer[cbSize] = 0;
+    buffer[cbSize+1] = 0;
+    if(IsTextUnicode(pBuffer, cbSize, NULL)) {
+        WCHAR *data = (WCHAR*)pBuffer;
+        if(StrStrIW(data, szHtmlW) ||
+           StrStrIW(data, szDoctypeW) ||
+           StrStrIW(data, szBodyW) ||
+           StrStrIW(data, szHeadW) ||
+           StrStrIW(data, szTitleW)
+           )
+            ishtml = TRUE;
+    }
+    else {
+        char *data = (char*)pBuffer;
+        if(StrStrIA(data, szHtmlA) ||
+           StrStrIA(data, szDoctypeA) ||
+           StrStrIA(data, szBodyA) ||
+           StrStrIA(data, szHeadA) ||
+           StrStrIA(data, szTitleA)
+          )
+            ishtml = TRUE;
+    }
+    HeapFree(GetProcessHeap(), 0, buffer);
+    return ishtml;
+}
+
+BOOL URLMON_ScanData(const KNOWN_MIME_TYPES *type, LPVOID pBuffer, DWORD cbSize)
+{
+    if(!type)
+        return FALSE;
+    if(type->mask) {
+        int i;
+        const MIME_MASK *mask = type->mask;
+        BOOL or = FALSE;
+        BOOL found = TRUE;
+        BOOL last = FALSE;
+        for(i=0; mask[i].data; i++) {
+            if(mask[i].start > cbSize || (mask[i].start+mask[i].len) > cbSize) {
+                TRACE("Mask out of range\n");
+                return FALSE;
+            }
+            last = !memcmp(mask[i].data, ((char*)pBuffer)+mask[i].start, mask[i].len);
+            if(or) found = found || last;
+            else   found = found && last;
+            if(!found && !or)
+                return FALSE;
+            else if(found && or)
+                break;
+            or = mask[i].or;
+        }
+        TRACE("Found %s\n", debugstr_w(type->mime));
+        return TRUE;
+    }
+    else if(type->checkFormat) {
+        if(type->checkFormat(pBuffer, cbSize)) {
+            TRACE("Found %s\n", debugstr_w(type->mime));
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
+
+BOOL URLMON_IsBinary(LPVOID pBuffer, DWORD cbSize)
+{
+    int binarycount = 0;
+    int i;
+    unsigned char *buff = pBuffer;
+    for(i=0; i<cbSize; i++) {
+        if(buff[i] < 32 || buff[i] > 127)
+            binarycount++;
+    }
+    if(binarycount > (cbSize-binarycount)) {
+        TRACE("Is binary\n");
+        return TRUE;
+    }
+    TRACE("Is text\n");
+    return FALSE;
+}
+
+/***********************************************************************
+ *           FindMimeFromData (URLMON.@)
+ *
+ * Determines the Multipurpose Internet Mail Extensions (MIME) type from the data provided.
+ *
+ * NOTE
+ *  See http://msdn.microsoft.com/workshop/networking/moniker/overview/appendix_a.asp
+ */
+HRESULT WINAPI FindMimeFromData(LPBC pBC, LPCWSTR pwzUrl, LPVOID pBuffer,
+   DWORD cbSize, LPCWSTR pwzMimeProposed, DWORD dwMimeFlags,
+   LPWSTR* ppwzMimeOut, DWORD dwReserved)
+{
+    /* Magic numbers partly derived from file(1) */
+    const MIME_MASK gifMask[] = {{"GIF", 3, 0},{NULL}};
+    const MIME_MASK pngMask[] = {{"\x89PNG", 4, 0},{NULL}};
+    const MIME_MASK jpegMask[] = {{"\xff\xd8", 2, 0},{NULL}};
+    const MIME_MASK bmpMask[] = {{"BM", 2, 0},{NULL}};
+    const MIME_MASK aviMask[] = {{"RIFF", 4, 0,FALSE},{"AVI", 3, 8},{NULL}};
+    const MIME_MASK wavMask[] = {{"RIFF", 4, 0,FALSE},{"WAVE", 4, 8},{NULL}};
+    const MIME_MASK tiffMask[] = {{"MM", 2, 0,TRUE},{"II", 2, 0},{NULL}};
+    const MIME_MASK aiffMask[] = {{"AIFF", 4, 8,TRUE},{"AIFC", 4, 8,TRUE},{"8SVX", 4, 8},{NULL}};
+    const MIME_MASK zipMask[] = {{"PK\003\004",4,0},{NULL}};
+    const MIME_MASK gzipMask[] = {{"\037\213",2,0},{NULL}};
+    const MIME_MASK pdfMask[] = {{"%PDF-",4,0},{NULL}};
+    const MIME_MASK postscriptMask[] = {{"%!",2,0,TRUE},{"\004%!",3,0},{NULL}};
+    const MIME_MASK exeMask[] = {{"MZ",2,0},{NULL}};
+    
+    /* FIXME: Add more types, see URL in NOTE */
+    const KNOWN_MIME_TYPES types[] = {
+        {{'i','m','a','g','e','/','g','i','f','\0'}, TRUE, gifMask},
+        {{'i','m','a','g','e','/','x','-','p','n','g','\0'}, TRUE, pngMask},
+        {{'i','m','a','g','e','/','j','p','e','g','\0'}, TRUE, jpegMask},
+        {{'i','m','a','g','e','/','b','m','p','\0'},TRUE,bmpMask},
+        {{'i','m','a','g','e','/','t','i','f','f','\0'},TRUE,tiffMask},
+        {{'v','i','d','e','o','/','a','v','i','\0'},TRUE,aviMask},
+        {{'a','u','d','i','o','/','w','a','v','\0'},TRUE,wavMask},
+        {{'a','u','d','i','o','/','x','-','a','i','f','f','\0'},TRUE,aiffMask},
+        {{'a','p','p','l','i','c','a','t','i','o','n','/','x','-','z','i','p','-','c','o','m','p','r','e','s','s','e','d','\0'},TRUE,zipMask},
+        {{'a','p','p','l','i','c','a','t','i','o','n','/','x','-','g','z','i','p','-','c','o','m','p','r','e','s','s','e','d','\0'},TRUE,gzipMask},
+        {{'a','p','p','l','i','c','a','t','i','o','n','/','p','d','f','\0'},TRUE,pdfMask},
+        {{'a','p','p','l','i','c','a','t','i','o','n','/','p','o','s','t','s','c','r','i','p','t','\0'},FALSE,postscriptMask},
+        {{'a','p','p','l','i','c','a','t','i','o','n','/','x','-','m','s','d','o','w','n','l','o','a','d','\0'},TRUE,exeMask},
+        {{'t','e','x','t','/','h','t','m','l','\0'}, FALSE, NULL, URLMON_ScanHTML},
+        /* ambiguous types */
+        {{'a','p','p','l','i','c','a','t','i','o','n','/','o','c','t','e','t','-','s','t','r','e','a','m','\0'}, TRUE, NULL},
+        {{'t','e','x','t','/','p','l','a','i','n','\0'}, FALSE, NULL},
+    };
+    const WCHAR szBinaryMime[] = {'a','p','p','l','i','c','a','t','i','o','n','/','o','c','t','e','t','-','s','t','r','e','a','m','\0'};
+    const WCHAR szTextMime[] = {'t','e','x','t','/','p','l','a','i','n','\0'};
+    const WCHAR szContentType[] = {'C','o','n','t','e','n','t',' ','T','y','p','e','\0'};
+    WCHAR szTmpMime[256];
+    LPCWSTR mimeType;
+    int typeindex = -1;
+    int i;
+    HKEY hKey = NULL;
+    BOOL dataIsBinary = TRUE;
+    
+    TRACE("(%p,%s,%p,%ld,%s,0x%lx,%p,0x%lx)\n", pBC, debugstr_w(pwzUrl), pBuffer, cbSize,
+          debugstr_w(pwzMimeProposed), dwMimeFlags, ppwzMimeOut, dwReserved);
+    
+    if((!pwzUrl && (!pBuffer || cbSize <= 0)) || !ppwzMimeOut)
+        return E_INVALIDARG;
+
+    /* Check if the proposed mime type is known */
+    if(pwzMimeProposed) {
+        for(i=0; i<sizeof(types)/sizeof(types[0]); i++) {
+            if(!lstrcmpiW(pwzMimeProposed, types[i].mime)) {
+                TRACE("Found known mime %s\n", debugstr_w(types[i].mime));
+                typeindex = i;
+                break;
+            }
+        }
+        /* If this is an unknown mime type, return it unchanged */
+        if(typeindex == -1) {
+            mimeType = pwzMimeProposed;
+            goto foundmime;
+        }
+    }
+    
+    /* Try and find a match from the buffer contents */
+    if(pBuffer && cbSize > 0) {
+        dataIsBinary = URLMON_IsBinary(pBuffer, cbSize);
+        /* First try and match the mime type passed, otherwise check all known types */
+        if(typeindex != -1 && URLMON_ScanData(&types[typeindex], pBuffer, cbSize)) {
+            mimeType = types[typeindex].mime;
+            goto foundmime;
+        }
+        else {
+            for(i=0; i<sizeof(types)/sizeof(types[0]); i++) {
+                if(URLMON_ScanData(&types[i], pBuffer, cbSize)) {
+                    /* If we found a match, we're done */
+                    mimeType = types[i].mime;
+                    goto foundmime;
+                }
+            }
+        }
+        /* If no match was found, and we know the suggested mime type,
+           check if the buffer and mime type are the same type (binary or text) */
+        if(typeindex != -1) {
+            /* If they are the same, we are done */
+            if(types[typeindex].binary == dataIsBinary) {
+                mimeType = types[typeindex].mime;
+                goto foundmime;
+            }
+        }
+    }
+    /* If we've had no luck so far, try and get the mime type from the registry */
+    if(pwzUrl) {
+        LPWSTR ext = StrRChrW(pwzUrl, NULL, '.');
+        if(ext) {
+            DWORD dwSize;
+            if(!RegOpenKeyExW(HKEY_CLASSES_ROOT, ext, 0, 0, &hKey)) {
+                if(!RegQueryValueExW(hKey, szContentType, NULL, NULL, (LPBYTE)szTmpMime, &dwSize)) {
+                    /* a value exists, now see if its a known type and ignore if it is */
+                    typeindex = -1;
+                    for(i=0; i<sizeof(types)/sizeof(types[0]); i++) {
+                        if(!lstrcmpiW(szTmpMime, types[i].mime)) {
+                            typeindex = i;
+                            break;
+                        }
+                    }
+                    /* If the type is not known, return it */
+                    if(typeindex == -1) {
+                        mimeType = szTmpMime;
+                        goto foundmime;
+                    }
+                }
+            }
+        }
+    }
+    
+    /* Last resort, give either text/plain or application/octet-stream depending on of the data is primarilly binary or text */
+    if(dataIsBinary)
+        mimeType = szBinaryMime;
+    else
+        mimeType = szTextMime;
+    
+foundmime:
+    if(hKey) RegCloseKey(hKey);
+    *ppwzMimeOut = CoTaskMemAlloc((lstrlenW(mimeType)+1)*sizeof(WCHAR));
+    if(!*ppwzMimeOut) return E_OUTOFMEMORY;
+    lstrcpyW(*ppwzMimeOut, mimeType);
+    return S_OK;
+}

Reply via email to