Gitweb links:
...log
http://git.netsurf-browser.org/libnspsl.git/shortlog/cf5b82a3b3e8f424a87aed671259fc3546b5ea9a
...commit
http://git.netsurf-browser.org/libnspsl.git/commit/cf5b82a3b3e8f424a87aed671259fc3546b5ea9a
...tree
http://git.netsurf-browser.org/libnspsl.git/tree/cf5b82a3b3e8f424a87aed671259fc3546b5ea9a
The branch, master has been updated
via cf5b82a3b3e8f424a87aed671259fc3546b5ea9a (commit)
via e300bcf30dd0550ffce00e741ff0198e0fb8955f (commit)
from 0deeebf9d637750b4957575baee45c5bc13edc1d (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commitdiff
http://git.netsurf-browser.org/libnspsl.git/commit/?id=cf5b82a3b3e8f424a87aed671259fc3546b5ea9a
commit cf5b82a3b3e8f424a87aed671259fc3546b5ea9a
Author: Vincent Sanders <[email protected]>
Commit: Vincent Sanders <[email protected]>
generation tool now adds some basic statistics to output
diff --git a/src/genpubsuffix.pl b/src/genpubsuffix.pl
index 7bc29a8..47b50ba 100644
--- a/src/genpubsuffix.pl
+++ b/src/genpubsuffix.pl
@@ -138,30 +138,39 @@ sub generate_string_table
{
my ($tldtree_ref, $nodeidx_ref, $strtab_ref, $stridx_ref) = @_;
+ # obtain sorted array of domain label strings
+ my $labcount = 0; # total number of labels
+ my $labsize = 0; # total size of labels
my @tmp_array;
-
- foreach my $keys (keys %$strtab_ref) {
- push(@tmp_array, $keys);
+ foreach my $key (keys %$strtab_ref) {
+ use bytes;
+ push(@tmp_array, $key);
+ $labcount += 1;
+ $labsize += length($key);
}
-
my @domelem_array = sort { length($b) <=> length($a) } @tmp_array;
my $stringtable = "*!"; # table being generated
my $stringtablesize = 2;
+ my $labfullcount = 2; # labels inserted into the table in full
for my $domelem (@domelem_array) {
my $substridx = index($stringtable, $domelem);
if ($substridx != -1) {
# found existing string match so use it
$strtab_ref->{$domelem} = $substridx;
} else {
+ # no existing string match to put complete label into table
+ use bytes;
$strtab_ref->{$domelem} = $stringtablesize;
$stringtable .= $domelem;
- {
- use bytes;
- $stringtablesize += length($domelem);
- }
+ $stringtablesize += length($domelem);
+ $labfullcount += 1;
}
}
+ print "/**\n";
+ print " * Domain label string table.\n";
+ print " * " . $labcount . " labels(" . $labsize . " bytes) reduced to " .
$labfullcount . " labels(" . $stringtablesize . " bytes)\n";
+ print " */\n";
print "static const char stab[" . $stringtablesize . "] = {\n";
print " " . phexstr($stringtable);
print "};\n\n";
@@ -187,7 +196,7 @@ sub pstr_len
# those updating optidx to point to the next free node
sub calc_pnode
{
- my ($parent_ref, $strtab_ref, $opidx_ref) = @_;
+ my ($parent_ref, $strtab_ref, $opidx_ref, $nodecount_ref) = @_;
my $our_dat;
my $child_dat = "";
my $startidx = $$opidx_ref;
@@ -216,6 +225,8 @@ sub calc_pnode
while ( my ($cdom, $cref) = each(%$parent_ref) ) {
my $child_count = scalar keys (%$cref);
+ $$nodecount_ref += 1; # keep count of number of nodes in tree
+
$our_dat .= " { ";
$our_dat .= ".label = {" . $strtab_ref->{$cdom} . ", ". pstr_len($cdom)
;
if ($child_count == 0) {
@@ -225,7 +236,7 @@ sub calc_pnode
# complete label with children
$our_dat .= ", 1 } }, ";
$our_dat .= "{ .child = { " . $$opidx_ref . ", " . $child_count . "
} },\n";
- $child_dat .= calc_pnode($cref, $strtab_ref, $opidx_ref);
+ $child_dat .= calc_pnode($cref, $strtab_ref, $opidx_ref,
$nodecount_ref);
}
}
@@ -281,13 +292,27 @@ print " * From file " . basename($filename) . "\n";
print " * Converted on " . localtime() . "\n";
print " */\n\n";
-generate_string_table(\%tldtree, \$nodeidx, \%strtab, \$stridx);
-
print "enum stab_entities {\n";
print " STAB_WILDCARD = 0,\n";
print " STAB_EXCEPTION = 1\n";
print "};\n\n";
+print "/**\n";
+print " * Prefix suffix list graph node\n";
+print " */\n";
+print "union pnode {\n";
+print " struct {\n";
+print " uint16_t idx; /**< index of domain element in string table
*/\n";
+print " uint8_t len; /**< length of domain element in string table
*/\n";
+print " uint8_t children; /**< has children */\n";
+print " } label;\n";
+print " struct {\n";
+print " uint16_t index; /**< index of first child node */\n";
+print " uint16_t count; /**< number of children of this node */\n";
+print " } child;\n";
+print "};\n\n";
+
+generate_string_table(\%tldtree, \$nodeidx, \%strtab, \$stridx);
# output static node array
#
@@ -302,33 +327,21 @@ print "};\n\n";
# As labels cannot be more than 63 characters a byte length is more
# than sufficient.
-
my $opidx = 2; # output index of node
my $opnodes = ""; # output pnode initialisers
+my $opnodecount = 1; # output domain label nodes
# root node initialiser
$opnodes .= " /* root entry */\n";
$opnodes .= " { .label = { 0, 0, 1 } }, { .child = { " . $opidx . ", " .
scalar keys(%tldtree) . " } },";
# generate node initialiser
-$opnodes .= calc_pnode(\%tldtree, \%strtab, \$opidx);
-
-
-print "union pnode {\n";
-print " struct {\n";
-print " uint16_t idx; /**< index of domain element in string table
*/\n";
-print " uint8_t len; /**< length of domain element in string table
*/\n";
-print " uint8_t children; /**< has children */\n";
-print " } label;\n";
-print " struct {\n";
-print " uint16_t index; /**< index of first child node */\n";
-print " uint16_t count; /* number of children of this node */\n";
-print " } child;\n";
-print "};\n\n";
+$opnodes .= calc_pnode(\%tldtree, \%strtab, \$opidx, \$opnodecount);
+print "/**\n";
+print " * PSL represented as a directed acyclic graph\n";
+print " * There are " . $opnodecount . " labels in " . $opidx . " nodes\n";
+print " */\n";
print "static const union pnode pnodes[" . $opidx . "] = {\n";
-
-# output node initialisors
-print $opnodes;
-
+print $opnodes; # output node initialisors
print "\n};\n\n";
commitdiff
http://git.netsurf-browser.org/libnspsl.git/commit/?id=e300bcf30dd0550ffce00e741ff0198e0fb8955f
commit e300bcf30dd0550ffce00e741ff0198e0fb8955f
Author: Vincent Sanders <[email protected]>
Commit: Vincent Sanders <[email protected]>
add dependancy on generation tool
diff --git a/src/Makefile b/src/Makefile
index 35aba13..6f8c173 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -2,7 +2,7 @@ DIR_SOURCES := nspsl.c
src/nspsl.c:src/psl.inc
-src/psl.inc:public_suffix_list.dat
+src/psl.inc:public_suffix_list.dat src/genpubsuffix.pl
perl src/genpubsuffix.pl $< > $@
include $(NSBUILD)/Makefile.subdir
-----------------------------------------------------------------------
Summary of changes:
src/Makefile | 2 +-
src/genpubsuffix.pl | 75 ++++++++++++++++++++++++++++++---------------------
2 files changed, 45 insertions(+), 32 deletions(-)
diff --git a/src/Makefile b/src/Makefile
index 35aba13..6f8c173 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -2,7 +2,7 @@ DIR_SOURCES := nspsl.c
src/nspsl.c:src/psl.inc
-src/psl.inc:public_suffix_list.dat
+src/psl.inc:public_suffix_list.dat src/genpubsuffix.pl
perl src/genpubsuffix.pl $< > $@
include $(NSBUILD)/Makefile.subdir
diff --git a/src/genpubsuffix.pl b/src/genpubsuffix.pl
index 7bc29a8..47b50ba 100644
--- a/src/genpubsuffix.pl
+++ b/src/genpubsuffix.pl
@@ -138,30 +138,39 @@ sub generate_string_table
{
my ($tldtree_ref, $nodeidx_ref, $strtab_ref, $stridx_ref) = @_;
+ # obtain sorted array of domain label strings
+ my $labcount = 0; # total number of labels
+ my $labsize = 0; # total size of labels
my @tmp_array;
-
- foreach my $keys (keys %$strtab_ref) {
- push(@tmp_array, $keys);
+ foreach my $key (keys %$strtab_ref) {
+ use bytes;
+ push(@tmp_array, $key);
+ $labcount += 1;
+ $labsize += length($key);
}
-
my @domelem_array = sort { length($b) <=> length($a) } @tmp_array;
my $stringtable = "*!"; # table being generated
my $stringtablesize = 2;
+ my $labfullcount = 2; # labels inserted into the table in full
for my $domelem (@domelem_array) {
my $substridx = index($stringtable, $domelem);
if ($substridx != -1) {
# found existing string match so use it
$strtab_ref->{$domelem} = $substridx;
} else {
+ # no existing string match to put complete label into table
+ use bytes;
$strtab_ref->{$domelem} = $stringtablesize;
$stringtable .= $domelem;
- {
- use bytes;
- $stringtablesize += length($domelem);
- }
+ $stringtablesize += length($domelem);
+ $labfullcount += 1;
}
}
+ print "/**\n";
+ print " * Domain label string table.\n";
+ print " * " . $labcount . " labels(" . $labsize . " bytes) reduced to " .
$labfullcount . " labels(" . $stringtablesize . " bytes)\n";
+ print " */\n";
print "static const char stab[" . $stringtablesize . "] = {\n";
print " " . phexstr($stringtable);
print "};\n\n";
@@ -187,7 +196,7 @@ sub pstr_len
# those updating optidx to point to the next free node
sub calc_pnode
{
- my ($parent_ref, $strtab_ref, $opidx_ref) = @_;
+ my ($parent_ref, $strtab_ref, $opidx_ref, $nodecount_ref) = @_;
my $our_dat;
my $child_dat = "";
my $startidx = $$opidx_ref;
@@ -216,6 +225,8 @@ sub calc_pnode
while ( my ($cdom, $cref) = each(%$parent_ref) ) {
my $child_count = scalar keys (%$cref);
+ $$nodecount_ref += 1; # keep count of number of nodes in tree
+
$our_dat .= " { ";
$our_dat .= ".label = {" . $strtab_ref->{$cdom} . ", ". pstr_len($cdom)
;
if ($child_count == 0) {
@@ -225,7 +236,7 @@ sub calc_pnode
# complete label with children
$our_dat .= ", 1 } }, ";
$our_dat .= "{ .child = { " . $$opidx_ref . ", " . $child_count . "
} },\n";
- $child_dat .= calc_pnode($cref, $strtab_ref, $opidx_ref);
+ $child_dat .= calc_pnode($cref, $strtab_ref, $opidx_ref,
$nodecount_ref);
}
}
@@ -281,13 +292,27 @@ print " * From file " . basename($filename) . "\n";
print " * Converted on " . localtime() . "\n";
print " */\n\n";
-generate_string_table(\%tldtree, \$nodeidx, \%strtab, \$stridx);
-
print "enum stab_entities {\n";
print " STAB_WILDCARD = 0,\n";
print " STAB_EXCEPTION = 1\n";
print "};\n\n";
+print "/**\n";
+print " * Prefix suffix list graph node\n";
+print " */\n";
+print "union pnode {\n";
+print " struct {\n";
+print " uint16_t idx; /**< index of domain element in string table
*/\n";
+print " uint8_t len; /**< length of domain element in string table
*/\n";
+print " uint8_t children; /**< has children */\n";
+print " } label;\n";
+print " struct {\n";
+print " uint16_t index; /**< index of first child node */\n";
+print " uint16_t count; /**< number of children of this node */\n";
+print " } child;\n";
+print "};\n\n";
+
+generate_string_table(\%tldtree, \$nodeidx, \%strtab, \$stridx);
# output static node array
#
@@ -302,33 +327,21 @@ print "};\n\n";
# As labels cannot be more than 63 characters a byte length is more
# than sufficient.
-
my $opidx = 2; # output index of node
my $opnodes = ""; # output pnode initialisers
+my $opnodecount = 1; # output domain label nodes
# root node initialiser
$opnodes .= " /* root entry */\n";
$opnodes .= " { .label = { 0, 0, 1 } }, { .child = { " . $opidx . ", " .
scalar keys(%tldtree) . " } },";
# generate node initialiser
-$opnodes .= calc_pnode(\%tldtree, \%strtab, \$opidx);
-
-
-print "union pnode {\n";
-print " struct {\n";
-print " uint16_t idx; /**< index of domain element in string table
*/\n";
-print " uint8_t len; /**< length of domain element in string table
*/\n";
-print " uint8_t children; /**< has children */\n";
-print " } label;\n";
-print " struct {\n";
-print " uint16_t index; /**< index of first child node */\n";
-print " uint16_t count; /* number of children of this node */\n";
-print " } child;\n";
-print "};\n\n";
+$opnodes .= calc_pnode(\%tldtree, \%strtab, \$opidx, \$opnodecount);
+print "/**\n";
+print " * PSL represented as a directed acyclic graph\n";
+print " * There are " . $opnodecount . " labels in " . $opidx . " nodes\n";
+print " */\n";
print "static const union pnode pnodes[" . $opidx . "] = {\n";
-
-# output node initialisors
-print $opnodes;
-
+print $opnodes; # output node initialisors
print "\n};\n\n";
--
NetSurf Public Suffix List - Handling library
_______________________________________________
netsurf-commits mailing list
[email protected]
http://listmaster.pepperfish.net/cgi-bin/mailman/listinfo/netsurf-commits-netsurf-browser.org