#!/usr/local/bin/perl -w
#
# longdup.pl
# Prints the longest duplicate string of a file.
#
# By Buggs, buggs@splashground.de, 2001.
# Use under the same terms as Perl5.
# 
# Based on Jon Bentley's C code.
# See Chapter 15,"Programming Pearls", 2nd Edition, 2000, by Jon Bentley
# and ""Suffix Arrays", Dr. Dobb's Journal", p. 145ff, Issue 323 April 2001.
#

# how many matches?
use constant M => 1;

# remember filename
$filename = $ARGV[0];

# slurp file
$text .= $line while($line = <>);

# sorted list of substring indices 
$n = length($text);
@a = sort {substr($text,$a) cmp substr($text,$b)} (0..$n);

$maxlen = -1;
for($i = 0; $i < $n - M; $i++)
{ 
	$len = comlen($a[$i], $a[$i + M]);
	
	# longer? Then save it.
	if( $len > $maxlen )
	{
		$maxlen = $len;
		$maxi = $i;
	}
}

$offset1 = $a[$maxi];      $end1 = $offset1 + $maxlen;
$offset2 = $a[$maxi + M];  $end2 = $offset2 + $maxlen;

print
  "--- $filename --- Bytes($offset1,$end1) eq Bytes($offset2,$end2):\n",
  substr($text,$offset1,$maxlen);

  
sub comlen
#
# Return length of common substring.
# 
{
	my $j;
	my $p = substr($text,$_[0]);
	my $q = substr($text,$_[1]);
	
	for($j = 0;$j < length($p);$j++)
	{
		last if(substr($p,$j,1) ne substr($q,$j,1));
	}
	return $j;
}


