I have a perl script (see below) for updating the mime.types file with the 
latest
registered IANA media types.  I would like to add it to our version control,
but I am unsure whether to place it in

  httpd/trunk/support/

or in

  httpd/docs-build/trunk/

I guess it depends on whether we want to distribute it as part of the product
or just use it ourselves as an occasional tool.  It is generally useful, though
not intended to be bullet proof.

....Roy

==========================

#!/usr/bin/perl
#
# update_mime_types.pl: Read an existing Apache mime.types file and
# merge its entries with any new types discovered within an
# IANA media-types.xml file (see below for obtaining it).
#
# All existing mime.types entries are preserved as is (aside from sorting).
# Any new registered types are merged as a commented-out entry without
# an assigned extension, and then the entire file is printed to stdout.
#
# Typical use would be something like:
# 
#  wget -N http://www.iana.org/assignments/media-types/media-types.xml
#  ./update_mime_types.pl > new.types
#  diff -u mime.types new.types               ; check the differences
#  rm mime.types && mv new.types mime.types   ; only if diffs are good
#
# Note that we assume all files are in the current working directory
# and efficiency is not an issue.
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
my $mity = 'mime.types';
my $medy = 'media-types.xml';

die "no $mity here\n" unless (-e $mity);
die "no $medy here\n" unless (-e $medy);

my $in_head = 1;
my @header = ();
my %mtype = ();

# Read through the Apache httpd mime.types file to create tables
# keyed on the minor type names.  We save the entire input line as
# the hash value so that existing configs won't change when output.
# We assume the type names are already lowercased tokens.
#
die "cannot open $mity: $!" unless open (MIME, "<", $mity);

while (<MIME>) {
    if ($in_head) {
        push @header, $_;
        if (/^# =========/) {
            $in_head = 0;
        }
        next;
    }
    if (/^(# )?([a-z_\+\-\.]+\/\S+)/) {
        $mtype{$2} = $_;
    }
    else {
        warn "Skipping: ", $_;
    }
}
close MIME;

# Read through the IANA media types registry, in XML form, and extract
# whatever looks to be a registered type based on the element structure.
# Yes, this is horribly fragile, but the format isn't expected to change.
#
die "cannot open $medy: $!" unless open (IANA, "<", $medy);

my $major    = 'examples';
my $thistype = '';

while (<IANA>) {
    last if (/^\s*<people>/);
    next if (/(OBSOLETE|DEPRECATE)/);

    if (/^\s*<registry id="([a-z_\+\-\.]+)"/) {
        $major = $1;
        next;
    }
    next if ($major eq 'examples');

    if (/^\s*<name>([^<]+)<\/name>/) {
        $thistype = lc "$major/$1";
        if (!defined($mtype{$thistype})) {
            $mtype{$thistype} = "# $thistype\n";
        }
    }
}
close IANA;

# Finally, output a replacement for Apache httpd's mime.types file
#
print @header;

foreach $key (sort(keys %mtype)) {
    print $mtype{$key};
}

exit 0;

Reply via email to