Hello folks I've some troubles with a perlscript that you can see below. The problem is that some german special characters (umlaut) are not displayed as they should be. This seems to be an encoding-issue. Either the internal perl variables have the wrong encoding or the lwp-module when grapping the html? Additional the output to write the data into the MS-Access DB can also have the incorrect encoding. How can we fix the uncertainty? What can I do to ensure the right encoding in all levels?
Thanks a lot in advanced! Johannes Here comes the script: #!C:\Program Files\Perl\bin\perl.exe -w use strict; use LWP::UserAgent; use Win32::ODBC; my $db = new Win32::ODBC('PerlRes') ; my($inhalt, $detail, @compInfo, $datum, $headline, $company, $message, $content, $ua, $request, $response, $ua2, $request2, $response2); for(my $i = 1; $i < 2; $i++) { $ua = LWP::UserAgent->new(); $request = HTTP::Request->new('GET', "http://www.dgap.de/dgap/static/News/?newsType=ADHOC&page=" . $i . "&limit=20"); $request->header('Content-Type' => 'text/html; charset=iso-8859-1'); $response = $ua->request($request); $inhalt = $response->content; while($inhalt =~ /alt="DGAP-Ad-hoc" \/>\s+?<\/td>\s+?<td class="content_text">\s+?<a href="(.+)">\s+?<strong>/g) { $ua2 = LWP::UserAgent->new(); $request2 = HTTP::Request->new('GET', $1); $request2->header('Content-Type' => 'text/html; charset=iso-8859-1'); $response2 = $ua->request($request2); $detail = $response2->content; if($detail =~ /news_content ">\s+?<h2 class="darkblue">\s+?(.+)\s+?<\/h2>/) { $datum = $1; $datum =~ s/\s*//; $datum =~ s/\s+?$//g; } if($detail =~ /<h2 class="darkblue">\s+?.+?\s+?<\/h2>\s+?<div>\s+?<h1>(.+)<\/h1>/) { $headline = $1; $headline =~ s/;/|/g; $headline =~ s/\n//g; } if($detail =~ /<div class="newsDetail_body_pre"><pre>\s+?<b>(.+)<\/b>/) { @compInfo = split("/",$1); $company = $compInfo[0]; $company =~ s/\n//g; $message = $compInfo[1]; $message =~ s/\s//g; } if($detail =~ /<pre>(.+)<\/pre>/s) { $content = $1; $content =~ s/;/|/g; $content =~ s/<\/?.+?>//g; } $db->Sql("INSERT into results VALUES('" . $datum . "','" . $headline . "','" . $company . "','" . $message . "','" . $content . "')"); } $db->Close(); }