#!/usr/bin/perl use strict; my ($count1, $count2, @entries); my ($inelem, $infile, $inkeys, $key, $line, $nline, $outfile, $owner); my (@set, $startpart, $tmp, $values); my (%dbhash, $pgpart); $outfile = "insert2mysql.txt"; $infile = $ARGV[$#ARGV]; $nline = ""; $owner = $ENV{'USER'}; open(IFILE, "<$infile") || die "SOMETHING FISHY"; open(OFILE, ">$outfile") || die "SOMETHING FISHY"; $startpart = "INSERT INTO literatur"; #$pgpart = '("cite","author","editor","title","booktitle","chapter","journal","year","month","institution","organization","school","volume","number","series","edition","pages","publisher","address","howpublished","mykey","crossref","note","annote","type","owner","lang")'; %dbhash = qw(cite NULL author NULL editor NULL title NULL booktitle NULL chapter NULL journal NULL year NULL month NULL institution NULL organization NULL school NULL volume NULL number NULL series NULL edition NULL pages NULL publisher NULL address NULL howpublished NULL key NULL crossref NULL note NULL annote NULL type NULL owner NULL lang NULL ); # -- First we start of reading every single line while ( $line = ) { if ( substr($line,0,1) ne "%" ) { # -- Remove the trailing newline character chop $line; # -- Remove all tabs $line =~ s/\t//g; # -- Remove successing blanks at the end and at the beginning $line =~ s/[ ]+$//g; # $line =~ s/^[ ]+//g; # -- Substitute every comma at the end $line =~ s/\},$/\}LASTKOMMA/g; $count1 = ( $line =~ tr/\{//); $count2 = ( $line =~ tr/=//); if ( $count1 == 0 and $count2 != 0 ) { # -- we must have a line key = value without braces, so the comma at the end # -- is in fact a LASTKOMMA $line =~ s/,$/LASTKOMMA/g; } elsif ( $line =~ /^@/ ) { # -- here we have the first entry, so the comma is in fact a LASTKOMMA too $line =~ s/,$/LASTKOMMA/g; } elsif ( $line =~ /^.+@/ ) { $line =~ s/^[ ]+//; if ( $line =~ /^@/ ) { $line =~ s/,$/LASTKOMMA/g; } } # print OFILE "$line\n"; # $line =~ s/\",$/LASTKOMMA/g; # -- Append the line to eventually present previous lines resulting in a new # -- and hence longer line $nline = $nline.$line; # -- Substitute successing blanks by only one in the whole string $nline =~ s/[ ]+/ /g; # print OFILE "$nline\n"; # -- Remove an eventually present last comma $nline =~ s/LASTKOMMA\}/\}/g; # print OFILE "$nline\n"; # -- Count opening and closing braces $count1 = ($nline =~ tr/\{//); $count2 = ($nline =~ tr/\}//); # -- If these numbers are the same then we have read one set of data! if ( $count1 == $count2 and $count1 != 0 ) { # -- Now we match this set to the following form: # -- @($1){($2)} meaning we have a leading '@' followed by the type # -- then an opening '{' and a trailing closing '}' $nline =~ /^@(.+?)\{(.+?)\}$/ ; $values = lc($1); $dbhash{type} = "\'$values\'"; $inelem = "cite = ".$2; $dbhash{owner} = "\'$owner\'"; $values = "\'$values\',\'$owner\'"; $inkeys = '"type","owner"'; @entries = split(/LASTKOMMA/,$inelem); foreach $key (@entries) { @set = split(/=/,$key); $set[0] =~ s/ //g; $set[1] =~ s/^ //; $set[1] =~ s/ $//; if ( $set[1] =~ /^"(.+?)"$/ ) { $tmp = $1; } elsif ( $set[1] =~ /^\{+(.+?)\}+$/ ) { # we cannot be sure whether we got everything so let's # count the opening and closing braces \{ and add one at the end # if it doesn't match $tmp = $1; $count1 = ($tmp =~ tr/^\{//); $count2 = ($tmp =~ tr/^\}//); if ( $count1 > $count2 ) { $tmp = "$tmp\}"; } elsif ( $count1 < $count2 ) { $tmp = "\{$tmp"; } } else { $tmp = $set[1]; }; $tmp =~ s/\\/\\\\/g; $tmp =~ s/\'/\'\'/g; $tmp =~ s/(.+?)\{(.)\}(.+?)/$1$2$3/g; $tmp =~ s/^\{(.)\}(.+?)/$1$2$3/g; $tmp =~ s/\{\\\\"(.)\}/\\\\"$1/g; if ( $tmp =~ /&/ ){ if ( not( $tmp =~ /\\&/ ) ) { $tmp =~ s/&/\\&/g; }; }; if ( $tmp =~ /\"a|\"A|\"o|\"O|\"u|\"U/ ){ if ( not( $tmp =~ /\\"a|\\"A|\\"o|\\"O|\\"u|\\"U/ ) ) { $tmp =~ s/\"a/\\\\"a/g; $tmp =~ s/\"A/\\\\"A/g; $tmp =~ s/\"o/\\\\"o/g; $tmp =~ s/\"O/\\\\"O/g; $tmp =~ s/\"u/\\\\"u/g; $tmp =~ s/\"U/\\\\"U/g; }; }; $tmp =~ s/ä/\\\\"a/g; $tmp =~ s/Ä/\\\\"A/g; $tmp =~ s/ö/\\\\"o/g; $tmp =~ s/Ö/\\\\"O/g; $tmp =~ s/ü/\\\\"u/g; $tmp =~ s/Ü/\\\\"U/g; $tmp =~ s/ß/\{\\\\ss\}/g; # print OFILE " $set[0]: $tmp\n"; $set[0] = lc($set[0]); $inkeys = "$inkeys,\"$set[0]\""; if ( ( $set[0] eq "year" ) ){ $dbhash{$set[0]} = $tmp; # $values = "$values,$tmp"; } elsif ( $set[0] eq "key" ){ $dbhash{key} = "\'$tmp\'"; # $values = "$values,\'mykey\'"; } elsif ( $set[0] eq "language" ){ $dbhash{lang} = "\'$tmp\'"; # $values = "$values,\'lang\'"; } else { $dbhash{$set[0]} = "\'$tmp\'"; # $values = "$values,\'$tmp\'"; }; }; # print OFILE "$startpart $$inkeys$ VALUES $$values$\n"; print OFILE "$startpart VALUES ($dbhash{cite},$dbhash{author},$dbhash{editor},$dbhash{title},$dbhash{booktitle},$dbhash{chapter},$dbhash{journal},$dbhash{year},$dbhash{month},$dbhash{institution},$dbhash{organization},$dbhash{school},$dbhash{volume},$dbhash{number},$dbhash{series},$dbhash{edition},$dbhash{pages},$dbhash{publisher},$dbhash{address},$dbhash{howpublished},$dbhash{key},$dbhash{crossref},$dbhash{note},$dbhash{annote},$dbhash{type},$dbhash{owner},$dbhash{lang}); \n"; foreach $key (keys(%dbhash)) { $dbhash{$key} = 'NULL'; } $nline = ""; }; }; }; close(IFILE); close(OFILE);