On Sat, 20 Nov 1999, I wrote:

> On Fri, 19 Nov 1999, Klaus Weide wrote:
> 
> > Have you submitted your changes to Tom?  Have you tried to ask Subir
> > ([EMAIL PROTECTED]) to add a link?
> 
> I had sent a mail to Tom ([EMAIL PROTECTED]) but I got no answer.
> I'll send him again from now.

I got his answer. He wrote to me like this.

| I haven't been keeping up with lynx except to verify it still works every
| so often.
| 
| The program is GPL, but if there is a conflict I can make it a less
| restrictive opensource license.
| 
| The current version of the plain renderer is attached.  Feel free to
| forward it to lynx-dev or backfill the modifications.

I attach his new script to this mail.
--
Takeshi Hataguchi
E-mail: [EMAIL PROTECTED]
#!/usr/bin/gawk -f

# 10/16/98 Added generic &#NN; , split("" v.s. delete, nonest fix
# 2/4/98 Returns 1 if no tables have been rendered
# 1/23/98 fixed graphic vrule cr problem
# 11/25/97 Fixed unterminated table, added some local vars
# 4/4/97 rowspan fixes and cleanup
# spreads rowspan out evenly (not based on other cell height)
# valign still not handled

#look for CONFIGURE for changable parameters
#
#command line usage
#
#t2t [- options...]
#
#options: (see below at BEGIN for explanation)
#
#nonest     use references instead of nesting
#graphic    use line drawing chars instead of |+-
#rowlines   add interrow lines
#vertstack  stack nested tables vertically (experimental)
#nonesid    omit nested sides for a thinner image (experimental)
#nosides    omit sides for a thinner image (experimental)
#maxwid=NNN try to wrap text in columns wider than this
#extlines   also render text outside of tables
#isolate=NN only print the Nth table
#denest=N   ignore N outside levels of tables
#datarows   only print rows with some text

####################
#This is to expand for integration into something like lynx
#The return value is the amount of spaces the markup takes
function vislength(str) {
  return length(str);
}

####################
#append string to image with alignment and space padding
function alimg( span, col, row,       cnt1, wid ) {
  wid = span - 1 - vislength(imline);
  cnt1 = 0;
  while( cnt1 < span )
    wid += colwid[tnst,col+cnt1++];
  if( !wid )
    image[iline] = image[iline] imline vrule;
  else if( substr(align[tnst,row,col],0,6) == "CENTER" ) {
    if( wid  % 2 ) {
      imline = imline " ";
      wid--;
    }
    wid = wid / 2;
    image[iline] = image[iline] substr( spaces, 0, wid ) imline substr( spaces, 0, wid 
) vrule;
  }
  else if( substr(align[tnst,row,col],0,5) == "RIGHT" )
    image[iline] = image[iline] substr( spaces, 0, wid ) imline vrule;
  else
    image[iline] = image[iline] imline substr( spaces, 0, wid ) vrule;
}

####################
#write out line characters
function dohline( cl , cm, cr, rfl, row,        cnt,tcs1,col,cnt1 ) {

  col = 1;
  if( !rowlines || rsp[tnst,row,1] < 1 || iline == 0 )
    image[iline] = cl;
  else
    image[iline] = vrule;

  cnt = 0;
  while( col < mxcol ) {
    if( cnt || !rowlines || !rfl || rsp[tnst,row,col] < 1 || iline == 0 ) {
      image[iline] = image[iline] substr( hrule , 0, colwid[tnst,col] );
# corrects line image for colspans in row (top or bottom, not all middle)
      if( cnt ) {
        cnt--;
        image[iline] = image[iline] hrule1;
      }
      else if( tcs[tnst,row,col] <= 1 )
        image[iline] = image[iline] cm;
      else {
        cnt = tcs[tnst,row,col] - 2;
        image[iline] = image[iline] hrule1;
      }
      delete rsp[tnst,row-1,col];
      col++;
    }
    else {
      len = -1;
      cnt1 = rsp[tnst,row-1,col] + 1;
      while( cnt1 )
        len += rowhght[tnst, row - cnt1-- + 1] + rowlines;
      imline = ttext[tnst , row - rsp[tnst,row-1,col] , col , len ];
      delete ttext[tnst , row - rsp[tnst,row-1,col] , col , len ];

      tcs1 = tcs[tnst,row - rsp[tnst,row-1,col],col];
      delete rsp[tnst,row-1,col];
#compare below
      if( !tcs1 )
        tcs1++;
      alimg(tcs1,col, row);
      col += tcs1;
    }
  }
  image[iline] = image[iline] substr( hrule , 0, colwid[tnst,col] ) cr;

  if( graphic ) {
    gsub( "[^" hrule1 "]" cm , "&" cl , image[iline] );
    gsub( cm cl , cl , image[iline] );
    gsub( cm "[^" hrule1 "]" , cr "&" , image[iline] );
    gsub( cr cm , cr , image[iline] );
    gsub( cl "[^" hrule1 "]" , cl "&" , image[iline] );
    gsub( cl cl , vrule , image[iline] );
    gsub( vrule hrule1 , cl hrule1 , image[iline] );
    gsub( vrule cr , vrule , image[iline] );
  }

  if( !sides ) #strip sidebars?
    image[iline] = substr( image[iline] , 2 , length( image[iline] ) - 2 );

  iline++;
}

####################
#print a table into the image
function printtab(        tcs1,col,row,cnt)  {

  sides = 1; #all
  if( nonesid )
    sides = ( tnst == 1 ); #nonesided
  else if( nosides )
    sides = 0; #never

  while( !colwid[tnst,mxcol] && mxcol > 0 )
    mxcol--;

  row = 1;
  iline = 0;
  if( !tcs[tnst,1,1] ) #short or blank row
    tcs[tnst,1,1] = mxcol;
#top line
  dohline( boxtl , boxt , boxtr , 1, row);
#data rows
  while( row < crow ) {
#lines in row (valign not handled yet)
    hght = 0;
    while( hght < rowhght[tnst,row] ) {
      image[iline] = vrule;
#each col in line
      col = 1;
      while( col <= mxcol ) {
        if( rsp[tnst,row-1,col] > 0 ) {
          len = 0;
          cnt = rsp[tnst,row-1,col];
          while( cnt )
            len += rowhght[tnst, row - cnt--] + rowlines;
          imline = ttext[tnst , row - rsp[tnst,row-1,col] , col , hght + len ];
          delete ttext[tnst , row - rsp[tnst,row-1,col] , col , hght + len ];
          tcs1 = tcs[tnst,row - rsp[tnst,row-1,col],col];
          if( !rowlines && hght+1 == rowhght[tnst,row] )
            delete rsp[tnst,row-1,col];
        }
        else {
          imline = ttext[tnst,row,col,hght];
          delete ttext[tnst,row,col,hght];
          tcs1 = tcs[tnst,row,col];
          if( !tcs1 ) #short or blank row
            tcs1 = mxcol - col + 1;
        }
        if( !tcs1 )
          tcs1++;
        alimg(tcs1,col,row);
        col += tcs1;
      }
      hght++;
      if( !sides ) { #strip sidebars?
        image[iline] = substr( image[iline] , 2 , length(image[iline]) - 2 );
        sub(" $" , vrule , image[iline]);
        sub("^ " , vrule , image[iline]);
      }
      iline++;
    }
#bottom or interrow line
    if( row + 1 == crow )
      dohline( boxbl , boxb , boxbr , 0, row);
    else if( rowlines )
      dohline( boxlf , cross , boxrt , 1, row);
    row++;
  }

  iline--;
  row = 1;
  while( row < crow )
    delete rowhght[tnst,row++];
  col = 0;
  while( col <= mxcol )
    delete colwid[tnst,col++];

  delete colwid[tnst];
  delete rowhght[tnst];
  delete tcs[tnst];
  delete rsp[tnst];

}

####################
#remove quotes around strings, i.e. "123" becomes 123
function stripit( pval ) {
  if( substr(pval,0,1) == "\"" ) {
    pval = substr(pval,2,length(pval)-1);
    if( match( pval , "\"" ) )
      pval = substr(pval,0,RSTART-1);
  }
  else if( match(pval," ") )
    pval = substr(pval,0,RSTART-1);
  gsub( "\>" , "", pval );
  return pval;
}

####################
#begin table data or header entry
function startentry(        col1) {
#missing </td>
  if( tdflag )
    endentry();

  ccol++;
  csp = 1;

  while( rsp[tnst,crow-1,ccol] >= 1 )
    ccol++;

#grab alignment
  align[tnst,crow,ccol] = defalign[tnst];
  valign[tnst,crow,ccol] = defvalign[tnst];
  if( substr(toupper($1)" ",0,3) == "TH " )
    align[tnst,crow,ccol] = "CENTER";
  if( match(toupper($1), " ALIGN=") )
    align[tnst,crow,ccol] = toupper(substr($1,RSTART+7,6));
  if( match(toupper($1), " VALIGN=") )
    valign[tnst,crow,ccol] = toupper(substr($1,RSTART+8,6));

#grab colspan
  if( match(toupper($1), "COLSPAN=") )
    csp = int(stripit(substr($1,RSTART+8,5)));

#grab rowspan;
  rowsp = 1;
  if( match(toupper($1), "ROWSPAN=") )
    rowsp = int(stripit(substr($1,RSTART+8,5)));
  rowspan[ccol] = rowsp;
  while( rowsp ) {
    rowsp--;
    col1 = csp;
    tcs[tnst,crow+rowsp,ccol] = col1;
    while( col1-- )
      rsp[tnst,crow+rowsp-1,ccol+col1] = rowsp;
    ttext[tnst,crow+rowsp,ccol,0] = "";
  }
  linet = 0;
  tdflag = 1;
}

####################
#correct column widths for longest text
function fixcolsp(        col1) {
  if( vertstack ) {
    if( col > colwid[tnst,ccol] )
      colwid[tnst,ccol] = col;
  }
  else {
    if( !csp )
      csp = 1;

    col1 = 0;
    while( col1 < csp ) {
      if( !colwid[tnst,ccol+col1] )
        colwid[tnst,ccol+col1] = 1;
      col -= colwid[tnst,ccol+col1];
      col1++;
    }
    if( col > 0 ) {
      col = int( ( col + csp - 1 ) / csp ) ;
      col1 = 0;
      while( col1 < csp ) {
        colwid[tnst,ccol+col1] += col;
        col1++;
      }
    }
  }
}

####################
#remove spaces on either side
function stripsd (str) {
  while( sub(" $","",str));
  while( sub("^ ","",str));
  return(str);
}

####################
#end table data or header entry
function endentry(        lx, lastcol) {

  if( colwid[tnst,ccol] == 0 )
    colwid[tnst,ccol] = 1;

  lastcol = 0;
  lx = 0;
  while( lx <= linet ) {
#trim edge spaces
    imline = stripsd(ttext[tnst,crow,ccol,lx]);
    col = vislength( imline );
    if( !col ) {
      imline = imline " ";
      col = 1;
    }
    else
      rowdata=1;
    ttext[tnst,crow,ccol,lx] = imline;

#debug
#print tnst " row:" crow " col:" ccol " line:" lx " len:" col \
#  " cs:" tcs[tnst,crow,ccol] "=" csp,rsp[tnst,crow,ccol] " >" \
#  ttext[tnst,crow,ccol,lx] "<";

    if( col > lastcol ) {
      fixcolsp();
      lastcol = col;
    }
    lx++;
  }
#remove trailing blank lines
  while( lx > 1 && ttext[tnst,crow,ccol,lx-1] == " " )
    lx--;

  rowsp = rowspan[ccol];
  if( rowsp > 1 )
    lx = int( (lx + rowsp) / rowsp); #+rowsp-1

  while( rowsp-- )
    if( lx > rowhght[tnst,crow+rowsp] )
        rowhght[tnst,crow+rowsp] = lx - (rowsp && rowlines);

  while( csp > 1 ) {
    rowspan[ccol+1] = rowspan[ccol];
    tcs[tnst,crow,++ccol] = 0;
    csp--;
  }

  tdflag = 0;
  linet = 0;
  tralready = 0;
}

####################
#normalize structures in cases of omitted </td> or two few entries
function endrow () {
  if( tdflag )
    endentry();
  if( ccol > mxcol )
    mxcol = ccol;
  ccol = 0;
  if( !datarows || rowdata )
    crow++;
  if( !rowhght[tnst,crow] )
    rowhght[tnst,crow] = 1;
  linet = 0;
  tralready = 1;
}

####################
#begin table data row
function startrow () {
#omitted </tr>
  if( tdflag || !tralready )
    endrow();
  tralready = 0;
#valign?
  defalign[tnst] = "default";
  defvalign[tnst] = "default";
  if( match(toupper($1), " ALIGN=") )
    defalign[tnst] = toupper(substr($1,RSTART+7,6));
  if( match(toupper($1), " VALIGN=") )
    defvalign[tnst] = toupper(substr($1,RSTART+8,6));
  if( !rowhght[tnst,crow] )
    rowhght[tnst,crow] = 1;
  tcs[tnst,crow,1] = 0;
  rowdata=0;
}

####################
# print text outside tables
function doxline() {
  inrow = 0;
  while( inrow <= linet ) {
    imline = stripsd(ttext[0,1,0,inrow++]);
    if( length(imline) )
      print imline;
  }
  split("", ttext);
  linet = 0;
}

####################
function resetvars() {
  linet = 0;  crow = 1;  ccol = 0;
  mxcol = 1;  csp = 1;  tdflag = 0;
}

####################
function fillstr(str) {
  str = str str str str; #4
  str = str str str str; #16
  str = str str str str; #64
  str = str str str str; #256
  return str;
}

####################
function endtable() {
  if( !tralready )
    endrow();
  tralready = 0;
#generate image
  printtab();
  tnst--;
  tdflag = tdf[tnst];      linet = line[tnst];      crow = currow[tnst];
  ccol = curcol[tnst];      mxcol = maxcol[tnst];      csp = colsp[tnst];

#go past a nonblank line      
  imline = stripsd(ttext[tnst,crow,ccol,linet]);
  ttext[tnst,crow,ccol,linet] = imline;
  if( length( imline ) )
    linet++;
#print or copy the rendered subtable
  inrow = 0;
  if( nonest || tnst <= denest ) {
    if( !isolate || isolate == curtbl[tnst] ) {
      while( inrow <= iline )
        print image[inrow++];
      if( nonest )
        print "TABLE " tnst + 1 "." curtbl[tnst] "\n";
    }
    if( !nonest )
      linet = 0;
    system("");
    exitflag = 0;
  }
  else {
    while( inrow <= iline ) {
      ttext[tnst,crow,ccol,linet+inrow] = image[inrow];
      inrow++;
    }
    col = vislength( image[inrow-1] );
    csp = tcs[tnst,crow,ccol];
    fixcolsp();
    linet += inrow;
    if( linet > rowhght[tnst,crow] )
      rowhght[tnst,crow] = linet;
  }
  inrow = 0;
  if( tnst > denest && ( vertstack || !tdflag ) ) {
    endentry();
    endrow();
  }
}

####################
#main

#set some variables
BEGIN { 
  RS = "\<" ; 
  FS = "\>" ; 

  exitflag = 1;

  tnst = 0;
  tralready = 0;
  denest = 0;
  isolate = 0;
  resetvars();
  rowdata=0;
  datarows=0;

#csp=1

#CONFIGURE
  nonest = 0;

#CONFIGURE
#sides (vertical)
#default  nonesid nosides
#+------+ +----+ ----
#|+---+ | |--- | ---
#||A|B|C| |A|BC| A|BC
#|+---+ | |--- | ---
#+------+ +----+ ----
  nonesid = 0;
  nosides = 0;
#CONFIGURE - set to 1 for lines between rows
  rowlines = 0;
#CONFIGURE - set to 1 for lines outside of tables
  extlines = 0;
#CONFIGURE - set to 1 to stack all tables vertically
  vertstack = 0;
#CONFIGURE
# Ascii boxes
  boxtl = "+"; boxt = "+"; boxtr = "+"; boxbl = "+"; boxb = "+"; boxbr = "+";
  vrule = "|"; hrule = "-"; cross = "+"; boxlf = "+"; boxrt = "+";

#CONFIGURE
#split if column would be wider than
  maxwid = 80;
#split at the first space after backing up
  splitat = 10;

  while( ARGC ) {
    if( match( ARGV[ARGC] , "graphic" ) ) {
# PC Graphics characters (single);
      boxtl = "\332";  boxt = "\302";  boxtr = "\277";
      boxbl = "\300";  boxb = "\301";  boxbr = "\331";
      vrule = "\263";  hrule = "\304";
      boxlf = "\303";  boxrt = "\264"; cross = "\305";
      graphic = 1;
    }
    else if( match( ARGV[ARGC] , "vertstack" ) )
      vertstack = 1;
    else if( match( ARGV[ARGC] , "maxwid=" ) )
      maxwid = int( substr( ARGV[ARGC], RSTART + 7 , 3 ));
    else if( match( ARGV[ARGC] , "denest=" ) )
      denest = int( substr( ARGV[ARGC], RSTART + 7 , 3 ));
    else if( match( ARGV[ARGC] , "isolate=" ) )
      isolate = int( substr( ARGV[ARGC], RSTART + 8 , 3 ));
    else if( match( ARGV[ARGC] , "rowlines" ) )
      rowlines = 1;
    else if( match( ARGV[ARGC] , "nonesid" ) )
      nonesid = 1;
    else if( match( ARGV[ARGC] , "nosides" ) )
      nosides = 1;
    else if( match( ARGV[ARGC] , "extlines" ) )
      extlines = 1;
    else if( match( ARGV[ARGC] , "nonest" ) )
      nonest = 1;
    else if( match( ARGV[ARGC] , "datarows" ) )
      datarows = 1;

    ARGC--;
  }

  hrule1 = hrule;

  hrule = fillstr(hrule1);
  spaces = fillstr(" ");
  underl = fillstr("_");
}

#################### MAIN processing
{
#print "a:"tnst","crow","ccol","linet "<" $1 ">" $2 ":" ttext[tnst,crow,ccol,linet] 
":";
  imline = toupper($1) " ";

  if( substr(imline,0,6) == "TABLE " ) {

    curtbl[tnst]++;
    if( tnst > denest ) {
      if( !tdflag ) {
        startrow();
        startentry();
      }
      if( nonest )
        ttext[tnst,crow,ccol,linet] = "[Table " tnst+1 "." curtbl[tnst] "]";
    }
#text outside tables
    else if ( extlines )
      doxline();

    system("");

    line[tnst] = linet;    currow[tnst] = crow;    curcol[tnst] = ccol;
    maxcol[tnst] = mxcol;    colsp[tnst] = csp;    tdf[tnst] = tdflag;
    tnst++;
    resetvars();
    rowhght[tnst,1] = 1;
    tralready = 1;
  }

  if( tnst > denest ) {
#begin/end markers
    if( substr(imline,0,3) == "TD " \
        || substr(imline,0,3) == "TH " )
      startentry();
    else if( substr(imline,0,4) == "/TD " \
        || substr(imline,0,4) == "/TH " )
      endentry();
    else if( substr(imline,0,3) == "TR " )
      startrow();
    else if( substr(imline,0,4) == "/TR " )
      endrow();
#END OF TABLE
    else if( substr(imline,0,7) == "/TABLE " )
      endtable();
  }
  else if( substr(imline,0,7) == "/TABLE " && tnst )
    tnst--;
  else if( substr(imline,0,6) == "/HTML " )
    tnst = 0;

#something we want to format?
  if( tnst > denest || extlines ) {
#line breaking entries
    if(        imline == "BR " || \
        substr(imline,0,3) == "HR " || \
        substr(imline,0,3) == "LI " || \
        substr(imline,0,2) == "P " || \
        substr(imline,0,7) == "OPTION " || \
        substr(imline,0,8) == "/SELECT " \
        ) {

#ignore for blank lines
      imline = stripsd(ttext[tnst,crow,ccol,linet]);
      ttext[tnst,crow,ccol,linet] = imline;
      if( length(imline) ) {
        linet++;
        ttext[tnst,crow,ccol,linet] = "";
      }
    }

#extract ALT string
    if( match(substr(imline,0,4), "IMG ") ) {
      if( match(toupper($1)," ALT")) {
        name = substr($1,RSTART+4,length($1)-7);
        match( name , "=" );
        name = substr(name,RSTART+1,length(name)-1);
        sub(/^ */,"",name);
        if( substr(name,0,1) == "\"" ) {
          name = substr(name,2,length(name)-1);
          match(name,"\"");
          name = substr(name,0,RSTART-1);
        }
        else if( match(name," ") )
          name = substr(name,0,RSTART-1);
        gsub( "\>" , "", name );
        $2 = " [" name "]" $2;
      }
    }

#indicate options follow
    if( substr(imline,0,7) == "SELECT " )
      $2 = "[select]";

#form input fields
    if( substr(imline,0,6) == "INPUT " ) {
      size = 0;
      if( match(imline, "SIZE=") )
        size = int( stripit( substr($1 , RSTART + 5 , 64) ) );
      value = "*";
      if( match(imline, "VALUE=") )
        value = stripit( substr($1 , RSTART + 6 , 256) );
      if( length( value ) < size )
        value = value substr( underl , 0 , size - length(value) );
      if( !match(imline, "TYPE=.?HIDDEN") )
        ttext[tnst,crow,ccol,linet] \
          = ttext[tnst,crow,ccol,linet] "[" value "]";
    }

#fix character formats - convert specials into something normal
    if( NF > 1 && length($2) ) {
      gsub("\046amp;","\\&",$2);      gsub("\046#169;","(C)",$2);
      gsub("\046#0124;","|",$2);      gsub("\046#0146;","'",$2);
      gsub("\046#146;","'",$2);
      gsub("\046#162;","c",$2);       gsub("\046#160;"," ",$2);
      gsub("\046quot","\"",$2);       gsub("\046copy;","(C)",$2);
      gsub("\046reg;","(R)",$2);      gsub("\046nbsp;"," ",$2);
      gsub("\046nbsp"," ",$2);

      while( match($2,"\046#" ) ) {
        val = substr( $2, RSTART + 2, 10 );
        gsub( ";.*", "", val);
        val = val + 0;
        if( val < 127 && val > 31 )
          val = sprintf( "%c", val );
        else
          val = sprintf("[%02x]",val);
        sub("\046#[0-9]*;",val,$2);
      }

      gsub("\n"," ",$2);
      gsub("\r","",$2);      gsub("\t"," ",$2);
      gsub("  *"," ",$2);
      if( !tdflag )
        sub(" $","",$2);
      gsub("\221","`",$2);      gsub("\222","'",$2);
      gsub("\223","(",$2);      gsub("\224",")",$2);
    }

#append line
    if( NF > 1 && length($2) ) {
      if( vertstack && tnst > denest && !ccol )
        ccol++;

      imline = ttext[tnst,crow,ccol,linet] $2;
      ttext[tnst,crow,ccol,linet] = imline;
#Split long lines - not fully operational
      while( length( imline ) > (maxwid / (tnst+1)) * csp ) {
        gsub( "^ ", "", imline );
        temp = 1;
        stspl = maxwid * csp - splitat * temp;
        while( stspl > 0 ) {
          if( match( substr( imline, stspl , length( imline ) )," ") ) {
            ttext[tnst,crow,ccol,linet] = substr(imline, 0, stspl + RSTART - 1);
            ttext[tnst,crow,ccol,linet + 1] = substr(imline, stspl + RSTART - 1, 
length( imline ) );
            stspl = -1;
          }
          else
            stspl = maxwid * csp - splitat * ++temp;
        }
        linet++;
        imline = ttext[tnst,crow,ccol,linet];
      }
    }
  }
#print "B:"tnst","crow","ccol","linet "<" $1 ">" $2 ":" ttext[tnst,crow,ccol,linet] 
":";
}

END { 
  while( tnst > denest )
    endtable();
  if( extlines )
    doxline();
  exit exitflag;
}

Reply via email to