Thanks!
Hein van den Heuvel
To test I created a 5GB file using 50,000,000 records of 99bytes + 1
newline. Do NOT use default IO. Takes too long!
Pre-alloated of course. And use sqo to prevent high-water marking
going crazy, 4 large buffers is enough (only ever saw 2 being written
from, while an other 1 w as being filled).
--------------------- create_big.pl ------------------
use VMS::Stdio (qw(&vmsopen));
$fh = vmsopen(qw(>big.tmp alq=10000000 fop=sqo ctx=rec mbc=112 mbf=4 rop=wbh));
printf $fh qq(%09d%90s\n),$_,q(*) foreach (1..50000000);
-------------------- test_big.pl -----------------
use VMS::Stdio (qw(&vmsopen));
use filehandle;
my $i = 0;
my $records = 0;
my $position;
my @positions;
my $fh = vmsopen(qw(<big.tmp fop=sqo ctx=rec mbf=8 rop=rah)) or die "$!";
while (<$fh>) {
# real work goes here...
$records++; # just count for now.
if (!($records % 1000000)) {
$position = $fh->getpos;
my ($a,$b) = unpack('LL',$position);
printf "$records records at (%0X,%0X) %s\n", $a, $b, substr($_,0,20);
$positions[++$i] = $position;
}
}
print "$records records.\n";
undef $fh;
my $fh = vmsopen(qw(<big.tmp ctx=rec mbf=1 mbc=1)) or die "$!";
foreach (1 .. $i) {
$fh->setpos($positions[$_]);
$line = <$fh>;
printf ("%4d %s\n", $_, substr ($line,0,20))
}
I also wrote a much similar program using C, give or take an off-by-one.
#include <stdio.h>
#include <stdlib.h>
#include <rms.h>
struct RAB *rab;
int acc_callback(int *first_arg, struct FAB *fab, struct RAB *the_rab) {
rab = the_rab; // stash it away
return 0;
}
int main(int argc, char *argv[]) {
FILE *fp;
int stat, i=0, j, records=0, *p;
char buffer[2048], ch;
fpos_t positions[100];
if ((fp = fopen(argv[1], "r", "acc", &acc_callback, 0,
"fop=sqo", "ctx=rec", "mbf=8", "mbc=112", "rop=rah")) == NULL) {
perror("open");
exit(1);
}
while ( fgets(buffer, sizeof buffer, fp)) {
records++;
if (!(records%1000000)) {
if (fgetpos(fp, &positions[i]) != 0) {
perror("fgetpos");
exit(1);
}
buffer[20]=0;
p = (void *) &positions[i]; // Helper for opaque item hex printout
printf ("%5d, pos: %08x %08x rfa: %08x,%04x %s\n",
i, p[0], p[1], rab->rab$l_rfa0, rab->rab$w_rfa4,buffer);
if (++i >= 100) break; // Emergency exit
}
}
fclose(fp);
if ((fp = fopen(argv[1], "r", "ctx=rec", "mbf=1", "mbc=1")) == NULL) {
perror("open");
exit(1);
}
for (j = 0; j<i; j++) {
if (fsetpos(fp, &positions[j]) != 0) {
perror("fsetpos");
exit(1);
}
fgets(buffer, sizeof buffer, fp);
buffer[20]=0;
printf ("%5d, %s\n", j, buffer);
}
}
Here are the test outputs, with the perl and C results merged, and a
little manual column aligning:
---------- perl result ---------- C-rtl getpos RMS RFA
1000000 records at ( 5F5E100,0) 001000000 0002faf0 00000100 0002faf1,0100
2000000 records at ( BEBC200,0) 002000000 0005f5e1 00000000 0005f5e2,0000
3000000 records at (11E1A300,0) 003000000 0008f0d1 00000100 0008f0d2,0100
:
21000000 records at (7D2B7500,0) 021000000 003e95ba 00000100 003e95bb,0100
22000000 records at (83215600,0) 022000000 004190ab 00000000 004190ac,0000
:
42000000 records at (FA56EA00,0) 042000000 007d2b75 00000000 007d2b76,0000
43000000 records at ( 4CCB00,0) 043000000 00802665 00000100 00802666,0100
44000000 records at ( 642AC00,0) 044000000 00832156 00000000 00832157,0000
:
50000000 records.
--- Perl setpos --- C-rtl set pos
1 001000001 001000001
2 002000001 002000001
3 003000001 003000001
:
21 021000001 022000001
22 023000001
:
42 043000001
43 50328 044000001
:
50 50328
0, pos: 0002faf0 00000100 rfa: 0002faf1,0100 001000000
1, pos: 0005f5e1 00000000 rfa: 0005f5e2,0000 002000000
2, pos: 0008f0d1 00000100 rfa: 0008f0d2,0100 003000000
:
20, pos: 003e95ba 00000100 rfa: 003e95bb,0100 021000000
21, pos: 004190ab 00000000 rfa: 004190ac,0000 022000000
:
41, pos: 007d2b75 00000000 rfa: 007d2b76,0000 042000000
42, pos: 00802665 00000100 rfa: 00802666,0100 043000000
43, pos: 00832156 00000000 rfa: 00832157,0000 044000000
:
0, 001000001
1, 002000001
2, 003000001
:
21, 022000001
22, 023000001
:
42, 043000001
43, 044000001