#!/usr/bin/perl

use strict;

my ($count1, $count2, @entries);
my ($inelem, $infile, $inkeys, $key, $line, $nline, $outfile, $owner);
my (@set, $startpart, $tmp, $values);
my (%dbhash, $pgpart);

$outfile = "insert2mysql.txt";
$infile = $ARGV[$#ARGV];

$nline = "";
$owner = $ENV{'USER'};

open(IFILE, "<$infile") || die "SOMETHING FISHY";
open(OFILE, ">$outfile") || die "SOMETHING FISHY";
$startpart = "INSERT INTO literatur"; 
#$pgpart = '("cite","author","editor","title","booktitle","chapter","journal","year","month","institution","organization","school","volume","number","series","edition","pages","publisher","address","howpublished","mykey","crossref","note","annote","type","owner","lang")';

%dbhash = qw(cite         NULL
             author       NULL
             editor       NULL
             title        NULL
             booktitle    NULL
             chapter      NULL
             journal      NULL
             year         NULL
             month        NULL
             institution  NULL
             organization NULL
             school       NULL
             volume       NULL
             number       NULL
             series       NULL
             edition      NULL
             pages        NULL
             publisher    NULL
             address      NULL
             howpublished NULL
             key          NULL
             crossref     NULL
             note         NULL
             annote       NULL
             type         NULL
             owner        NULL
             lang         NULL
            );


# -- First we start of reading every single line
while ( $line = <IFILE>) {
  if ( substr($line,0,1) ne "%" ) {
# -- Remove the trailing newline character
    chop $line;
# -- Remove all tabs
    $line =~ s/\t//g;
# -- Remove successing blanks at the end and at the beginning
    $line =~ s/[ ]+$//g;
#    $line =~ s/^[ ]+//g;
# -- Substitute every comma at the end
    $line =~ s/\},$/\}LASTKOMMA/g;
    $count1 = ( $line =~ tr/\{//);
    $count2 = ( $line =~ tr/=//);
    if ( $count1 == 0 and $count2 != 0 ) {
# -- we must have a line key = value without braces, so the comma at the end
# -- is in fact a LASTKOMMA
      $line =~ s/,$/LASTKOMMA/g;
    } elsif ( $line =~ /^@/ ) {
# -- here we have the first entry, so the comma is in fact a LASTKOMMA too
      $line =~ s/,$/LASTKOMMA/g;
    } elsif ( $line =~ /^.+@/ ) {
      $line =~ s/^[ ]+//;
      if ( $line =~ /^@/ ) {
	$line =~ s/,$/LASTKOMMA/g;
      }
    }
#  print OFILE "$line\n";
#  $line =~ s/\",$/LASTKOMMA/g;
# -- Append the line to eventually present previous lines resulting in a new
# -- and hence longer line
    $nline = $nline.$line;
# -- Substitute successing blanks by only one in the whole string
    $nline =~ s/[ ]+/ /g;
#  print OFILE "$nline\n";
# -- Remove an eventually present last comma
    $nline =~ s/LASTKOMMA\}/\}/g;
#  print OFILE "$nline\n";
# -- Count opening and closing braces
    $count1 = ($nline =~ tr/\{//);
    $count2 = ($nline =~ tr/\}//);
# -- If these numbers are the same then we have read one set of data!
    if ( $count1 == $count2 and $count1 != 0 ) {
# -- Now we match this set to the following form:
# -- @($1){($2)} meaning we have a leading '@' followed by the type
# --             then an opening '{' and a trailing closing '}'
      $nline =~ /^@(.+?)\{(.+?)\}$/ ;
      $values = lc($1);
      $dbhash{type} = "\'$values\'";
      $inelem = "cite = ".$2;
      $dbhash{owner} = "\'$owner\'";
      $values = "\'$values\',\'$owner\'";
      $inkeys = '"type","owner"';
      @entries = split(/LASTKOMMA/,$inelem);
      foreach $key (@entries) {
	@set = split(/=/,$key);
	$set[0] =~ s/ //g;
	$set[1] =~ s/^ //;
	$set[1] =~ s/ $//;
	if ( $set[1] =~ /^"(.+?)"$/ ) {
	  $tmp = $1;
	} elsif ( $set[1] =~ /^\{+(.+?)\}+$/ ) {
	# we cannot be sure whether we got everything so let's
	# count the opening and closing braces \{ and add one at the end
	# if it doesn't match
	  $tmp = $1;
	  $count1 = ($tmp =~ tr/^\{//);
	  $count2 = ($tmp =~ tr/^\}//);
	  if ( $count1 > $count2 ) {
	    $tmp = "$tmp\}";
	  } elsif ( $count1 < $count2 ) {
	    $tmp = "\{$tmp";
	  }
	} else {
	  $tmp = $set[1];
	};
	$tmp =~ s/\\/\\\\/g;
	$tmp =~ s/\'/\'\'/g;
	$tmp =~ s/(.+?)\{(.)\}(.+?)/$1$2$3/g;
	$tmp =~ s/^\{(.)\}(.+?)/$1$2$3/g;
	$tmp =~ s/\{\\\\"(.)\}/\\\\"$1/g;
	if ( $tmp =~ /&/ ){
	  if ( not( $tmp =~ /\\&/ ) ) {
	    $tmp =~ s/&/\\&/g;
	  };
	};
	if ( $tmp =~ /\"a|\"A|\"o|\"O|\"u|\"U/ ){
	  if ( not( $tmp =~ /\\"a|\\"A|\\"o|\\"O|\\"u|\\"U/ ) ) {
	    $tmp =~ s/\"a/\\\\"a/g;
	    $tmp =~ s/\"A/\\\\"A/g;
	    $tmp =~ s/\"o/\\\\"o/g;
	    $tmp =~ s/\"O/\\\\"O/g;
	    $tmp =~ s/\"u/\\\\"u/g;
	    $tmp =~ s/\"U/\\\\"U/g;
	  };
	};
	$tmp =~ s/ä/\\\\"a/g;
	$tmp =~ s/Ä/\\\\"A/g;
	$tmp =~ s/ö/\\\\"o/g;
	$tmp =~ s/Ö/\\\\"O/g;
	$tmp =~ s/ü/\\\\"u/g;
	$tmp =~ s/Ü/\\\\"U/g;
	$tmp =~ s/ß/\{\\\\ss\}/g;
#        print OFILE "  $set[0]: $tmp\n";
	$set[0] = lc($set[0]);
	$inkeys = "$inkeys,\"$set[0]\"";
	if ( ( $set[0] eq "year" ) ){
	  $dbhash{$set[0]} = $tmp;
#	$values = "$values,$tmp";
	} elsif ( $set[0] eq "key" ){
	  $dbhash{key} = "\'$tmp\'";
#	$values = "$values,\'mykey\'";
	} elsif ( $set[0] eq "language" ){
	  $dbhash{lang} = "\'$tmp\'";
#	$values = "$values,\'lang\'";
	} else { 
	  $dbhash{$set[0]} = "\'$tmp\'";
#	$values = "$values,\'$tmp\'";
	};
      };
#    print OFILE "$startpart \($inkeys\) VALUES \($values\)\n";
      print OFILE "$startpart VALUES ($dbhash{cite},$dbhash{author},$dbhash{editor},$dbhash{title},$dbhash{booktitle},$dbhash{chapter},$dbhash{journal},$dbhash{year},$dbhash{month},$dbhash{institution},$dbhash{organization},$dbhash{school},$dbhash{volume},$dbhash{number},$dbhash{series},$dbhash{edition},$dbhash{pages},$dbhash{publisher},$dbhash{address},$dbhash{howpublished},$dbhash{key},$dbhash{crossref},$dbhash{note},$dbhash{annote},$dbhash{type},$dbhash{owner},$dbhash{lang}); \n";
      foreach $key (keys(%dbhash)) {
	$dbhash{$key} = 'NULL';
      }
      $nline = "";
    };
  };
};

close(IFILE);
close(OFILE);
