commit 7d8c1fcf644bd921736f61f8f4461255e4325cbb
Author: Kornel Benko <[email protected]>
Date: Wed Oct 30 11:08:31 2024 +0100
Cmake url tests: try to check also some modified urls
If the url contains 'ctan', try to determine the correct url depending
of the components to deside if using 'https://www.ctan.org' or rather
https://mirrors.ctan.org.
Without 'ctan' try to check https:// instead of ftp:// or http://
---
development/checkurls/CheckURL.pm | 58 ++++++++++++++++++++++++++++++-
development/checkurls/knownToRegisterURLS | 1 -
development/checkurls/search_url.pl | 45 +++++++++++++++---------
3 files changed, 85 insertions(+), 19 deletions(-)
diff --git a/development/checkurls/CheckURL.pm
b/development/checkurls/CheckURL.pm
index 15839410f3..b2eec3adfa 100755
--- a/development/checkurls/CheckURL.pm
+++ b/development/checkurls/CheckURL.pm
@@ -19,7 +19,7 @@ our (@EXPORT, @ISA);
BEGIN {
use Exporter ();
@ISA = qw(Exporter);
- @EXPORT = qw(check_url);
+ @EXPORT = qw(check_url constructExtraTestUrl);
}
# Prototypes
@@ -260,6 +260,7 @@ sub check_unknown_url($$$$) {
# Main entry
sub check_url($$$$) {
my ($url, $use_curl, $fex, $fsx) = @_;
+ $url =~ s/%20/ /g;
$fe = $fex;
$fs = $fsx;
my $file = undef;
@@ -308,4 +309,59 @@ sub check_url($$$$) {
}
}
+sub constructExtraTestUrl($) {
+ my ($url) = @_;
+
+ my $urlok = $url;
+ my $protokol;
+ if ($urlok =~ s/^(ftp|https?):\/\///) {
+ $protokol = $1;
+ if ($protokol eq 'http') {
+ $protokol = 'https';
+ }
+ if (($protokol eq 'ftp') && ($urlok =~ /ctan/)) {
+ $protokol = 'https';
+ }
+ }
+ $urlok =~ s/^([^\/]+)//;
+ my $server = $1;
+ $urlok =~ s/^\///;
+ if ($server =~ /ctan/) {
+ $urlok =~ s/\/\/+/\//g;
+ $urlok =~ s/^ctan\///;
+ if ($urlok =~ /[\w][.](pdf|html|dvi)$/) {
+ if ($urlok =~ s/^(tex-archive|CTAN)\///) {
+ $server = 'mirrors.ctan.org';
+ }
+ elsif ($urlok =~ /(pgf)\//) {
+ $server = 'www.ctan.org';
+ }
+ }
+ else {
+ if ($urlok =~ s/\/$//) {
+ $server = 'www.cpan.org';
+ if ($urlok ne '') {
+ if ("$urlok/" =~
+
/^(biblio|bibliography|digest|documentation|dviware|fonts|graphics|help|indexing|info|install|languages?|macros|obsolete|support|systems|tds|usergrps|web)\//
+ )
+ {
+ $urlok = 'tex-archive/' . $urlok;
+ }
+ if ("$urlok/" !~ /^(pkg|topic|tex-archive|author)\//) {
+ die("");
+ }
+ }
+ }
+ }
+ }
+ my $url2;
+ if ($urlok eq '') {
+ $url2 = "$protokol://$server";
+ }
+ else {
+ $url2 = "$protokol://$server/$urlok";
+ }
+ return($url2);
+}
+
1;
diff --git a/development/checkurls/knownToRegisterURLS
b/development/checkurls/knownToRegisterURLS
index 6d75c27446..80e2e9d4c2 100644
--- a/development/checkurls/knownToRegisterURLS
+++ b/development/checkurls/knownToRegisterURLS
@@ -11,7 +11,6 @@
https://texample.net/media/tikz/examples/TEX/free-body-diagrams.tex
# Urls probably exist, but to check
# we need to register and login first
-http://www.issn.org/en/node/344
http://www.springer.de/author/tex/help-journals.html
http://www.wkap.nl/jrnllist.htm/JRNLHOME
http://www.wkap.nl/kaphtml.htm/STYLEFILES
diff --git a/development/checkurls/search_url.pl
b/development/checkurls/search_url.pl
index 24bc275a7f..a1a7497fb3 100755
--- a/development/checkurls/search_url.pl
+++ b/development/checkurls/search_url.pl
@@ -30,6 +30,7 @@
# (c) 2013 Scott Kostyshak <[email protected]>
use strict;
+use warnings;
BEGIN {
use File::Spec;
@@ -38,7 +39,6 @@ BEGIN {
unshift(@INC, "$p");
}
-use warnings;
use Cwd qw(abs_path);
use CheckURL;
use Try::Tiny;
@@ -46,6 +46,8 @@ use locale;
use POSIX qw(locale_h);
use Readonly;
+binmode(STDOUT, ":encoding(UTF-8)");
+
Readonly::Scalar my $NR_JOBS => 10;
setlocale(LC_CTYPE, "");
@@ -71,6 +73,7 @@ my %revertedURLS = ();
my %extraURLS = ();
my %selectedURLS = ();
my %knownToRegisterURLS = ();
+my %extraTestURLS = ();
my $summaryFile = undef;
my $checkSelectedOnly = 0;
@@ -80,7 +83,7 @@ for my $arg (@ARGV) {
if ($type eq "filesToScan") {
#The file should be a list of files to search in
- if (open(FLIST, $val)) {
+ if (open(FLIST, '<', $val)) {
while (my $l = <FLIST>) {
chomp($l);
parse_file($l);
@@ -105,7 +108,7 @@ for my $arg (@ARGV) {
readUrls($val, %knownToRegisterURLS);
}
elsif ($type eq "summaryFile") {
- if (open(SFO, '>', "$val")) {
+ if (open(SFO, '>:encoding(UTF8)', "$val")) {
$summaryFile = $val;
}
}
@@ -143,10 +146,15 @@ for my $u (@urls) {
next if ($checkSelectedOnly && !defined($selectedURLS{$u}));
$URLScount++;
push(@testvals, {u => $u, use_curl => $use_curl,});
- if ($u =~ s/^http:/https:/) {
- if (!defined($selectedURLS{$u})) { # check also the corresponging
'https:' url
- push(@testvals, {u => $u, use_curl => $use_curl, extra => 1,});
- $URLScount++;
+ my $uorig = $u;
+ $u = constructExtraTestUrl($uorig);
+ if ($u ne $uorig) {
+ if (!defined($selectedURLS{$u})) {
+ if (!defined($extraTestURLS{$u})) {
+ $extraTestURLS{$u} = 1; # omit multiple tests
+ push(@testvals, {u => $u, use_curl => $use_curl, extra => 1});
+ $URLScount++;
+ }
}
}
}
@@ -206,7 +214,7 @@ for (my $i = 0; $i < $NR_JOBS; $i++) { # Number of
subprocesses
my $use_curl = $rentry->{use_curl};
my $extra = defined($rentry->{extra});
- print $fe "Checking($entryidx-$subprocess) '$u': ";
+ print $fe "Checking($entryidx-$subprocess) '$u': time=" . time() . ' ';
my ($res, $prnt, $outSum);
try {
$res = check_url($u, $use_curl, $fe, $fs);
@@ -248,6 +256,7 @@ for (my $i = 0; $i < $NR_JOBS; $i++) { # Number of
subprocesses
else {
my $succes;
if ($extra) {
+ # This url is created
$succes = "Extra_OK url:";
}
else {
@@ -274,7 +283,7 @@ for (my $i = 0; $i < $NR_JOBS; $i++) { # Number of
subprocesses
sub readsublog($) {
my ($i) = @_;
- open(my $fe, '<', "$tempdir/xxxError$i");
+ open(my $fe, '<:encoding(UTF-8)', "$tempdir/xxxError$i");
while (my $l = <$fe>) {
if ($l =~ /^NumberOfErrors\s(\d+)/) {
$errorcount += $1;
@@ -345,29 +354,32 @@ sub printNotUsedURLS($\%) {
}
}
if (@msg) {
- print "\n$txt URLs not found in sources: " . join(' ', @msg) . "\n";
+ print "\n$txt URLs: " . join(' ', @msg) . "\n";
}
}
sub replaceSpecialChar($) {
my ($l) = @_;
$l =~ s/\\SpecialChar(NoPassThru)?\s*(TeX|LaTeX|LyX)[\s]?/$2/;
+ $l =~ s/ /%20/g;
return ($l);
}
sub readUrls($\%) {
my ($file, $rUrls) = @_;
- die("Could not read file $file") if (!open(ULIST, $file));
+ die("Could not read file $file") if (!open(ULIST, '<:encoding(UTF-8)',
$file));
+ print "Read urls from $file\n";
my $line = 0;
while (my $l = <ULIST>) {
$line++;
- $l =~ s/[\r\n]+$//; # remove eol
- $l =~ s/\s*\#.*$//; # remove comment
- $l = &replaceSpecialChar($l);
+ chomp($l); # remove eol
+ $l =~ s/^\s+//;
+ next if ($l =~ /^\#/); # discard comment lines
next if ($l eq "");
+ $l = &replaceSpecialChar($l);
my $use_curl = 0;
- if ($l =~ s/^\s*UseCurl\s*//) {
+ if ($l =~ s/^UseCurl\s*//) {
$use_curl = 1;
}
if (!defined($rUrls->{$l})) {
@@ -382,13 +394,12 @@ sub parse_file($) {
my $status = "out"; # outside of URL/href
#return if ($f =~ /\/attic\//);
- if (open(FI, $f)) {
+ if (open(FI, '<:encoding(UTF-8)', $f)) {
my $line = 0;
while (my $l = <FI>) {
$line++;
chomp($l);
- # $l =~ s/[\r\n]+$//; # Simulate chomp
if ($status eq "out") {
# searching for "\begin_inset Flex URL"
--
lyx-cvs mailing list
[email protected]
https://lists.lyx.org/mailman/listinfo/lyx-cvs