[Codestriker-user] Syntax highlight (patch included)

Nikita Borodikhin Fri, 22 Jun 2007 10:05:03 -0700

Hello, all!

Syntax highlight is probably the most needed feature of Codestriker as
it helps code readability dramatically. Here is my implementation of
syntax highlight using Syntax::Hightligh::Engine::Kate engine (thanks
to Kannan Goundan for pointing to it).


I've used to redesign Codestriker::Http::Render.pm internal workflow a
little but in my day-to-day work It works just like before. The only
thing I haven't check in real life is LXR linking but I'm quite sure
it works too.

Codestriker is great thing to help collaborative work, great thanks to
all the team!
--
Nikita V. Borodikhin, System Administrator NIKB-RIPN BNV7-RIPE
Registered Linux user #256562 with the Linux Counter

diff -pruN codestriker-1.9.3/bin/install.pl codestriker-1.9.3_hl/bin/install.pl
--- codestriker-1.9.3/bin/install.pl    2006-08-11 03:56:56.000000000 +0400
+++ codestriker-1.9.3_hl/bin/install.pl 2007-06-22 20:37:09.000000000 +0400
@@ -114,6 +114,11 @@ my $modules = [ 
         name => 'Authen::SASL', 
         version => '0',
         optional => 0
+    },
+    {
+       name => 'Syntax::Hightligh::Engine::Kate',
+       version => '0',
+       optional => 1
     }
 ];
 
diff -pruN codestriker-1.9.3/codestriker.conf 
codestriker-1.9.3_hl/codestriker.conf
--- codestriker-1.9.3/codestriker.conf  2007-03-07 06:38:01.000000000 +0300
+++ codestriker-1.9.3_hl/codestriker.conf       2007-06-22 20:32:25.000000000 
+0400
@@ -245,6 +245,15 @@ $COLOURED_MONO_MODE = 2;
 
 $default_topic_create_mode = $COLOURED_MODE;
 
+# Enable syntax highlighting.
+$use_syntax_highlight = 1;
+
+# Specify manual mapping between file extension and file type for highlighting.
+# File types are Syntax::Highlight::Engine::Kate ones.
+$syntax_extension_map = {
+       #".h" => "C++",
+};
+
 # The default line break viewing mode to use in the URL when viewing a
 # topic.  Can be either $LINE_BREAK_NORMAL_MODE or $LINE_BREAK_ASSIST_MODE.
 # Using $LINE_BREAK_ASSIST_MODE indicates that extra line breaks may be
diff -pruN codestriker-1.9.3/html/codestriker.css 
codestriker-1.9.3_hl/html/codestriker.css
--- codestriker-1.9.3/html/codestriker.css      2005-06-02 15:30:59.000000000 
+0400
+++ codestriker-1.9.3_hl/html/codestriker.css   2007-06-21 21:26:00.000000000 
+0400
@@ -144,3 +144,20 @@ A.tab {text-decoration: none}
 /* Styles for error field in comment posting tooltip. */
 span.hidden {display: none}
 span.error {display: inline; color: red}
+
+/* Styles for syntax highlighting */
+span.dsNormal {color: black}
+span.dsKeyword {color: blue}
+span.dsDataType {color: blue}
+span.dsDecVal {color: green}
+span.dsBaseN {color: green}
+span.dsFloat {color: green}
+span.dsChar {color: green}
+span.dsString {color: green; font-style: italic}
+span.dsComment {color: gray; font-style: italic}
+span.dsOthers {color: black}
+span.dsAlert {color: red; font-style: bold}
+span.dsFunction {color: black; font-style: bold}
+span.dsRegionMarker {color: black}
+span.dsError {color: red; font-style: bold}
+
diff -pruN codestriker-1.9.3/lib/Codestriker/Http/Render.pm 
codestriker-1.9.3_hl/lib/Codestriker/Http/Render.pm
--- codestriker-1.9.3/lib/Codestriker/Http/Render.pm    2006-06-14 
03:38:40.000000000 +0400
+++ codestriker-1.9.3_hl/lib/Codestriker/Http/Render.pm 2007-06-22 
20:33:37.000000000 +0400
@@ -14,6 +14,9 @@ use DBI;
 use CGI::Carp 'fatalsToBrowser';
 use HTML::Entities ();
 
+eval "use Syntax::Highlight::Engine::Kate;";
+my $highlight_available = !$@;
+
 # Colour to use when displaying the line number that a comment is being made
 # against.
 my $CONTEXT_COLOUR = "red";
@@ -83,6 +86,8 @@ sub new ([EMAIL PROTECTED]@[EMAIL PROTECTED]@[EMAIL 
PROTECTED]) {
     $self->{old_linenumber} = 1;
     $self->{new_linenumber} = 1;
 
+    $self->{use_highlight} = $Codestriker::use_syntax_highlight && 
$highlight_available;
+
     # Get the main entry to the database
     my $topic_obj = Codestriker::Model::Topic->new($self->{topic});
     # Check for readonly
@@ -114,6 +119,7 @@ sub new ([EMAIL PROTECTED]@[EMAIL PROTECTED]@[EMAIL 
PROTECTED]) {
     $self->{diff_current_filename} = "";
     $self->{diff_current_revision} = "";
     $self->{diff_current_repmatch} = 0;
+    $self->{diff_current_highlighter} = 0;
 
     # Check if the repository has an associated LXR mapping, and if so, 
     # setup a db connection and prepare a select statement.
@@ -188,62 +194,64 @@ sub lxr_ident($$) {
     }
 }
 
-# Parse the line and product the appropriate hyperlinks to LXR.
+# Parse the unescaped line and return array of begin position and length
+# of identifiers that can be converted to LXR links.
 # Currently, this is very Java/C/C++ centric, but it will do for now.
 sub lxr_data($$) {
     my ($self, $data) = @_;
 
+    my @positions = ();
+
     # Don't do anything if LXR is not enabled for this topic.
-    return $data if ! defined $self->{idhashref};
+    return @positions if ! defined $self->{idhashref};
 
     # If the line is just a comment, don't do any processing.  Note this code
     # isn't bullet-proof, but its good enough most of the time.
     $_ = $data;
-    return $data if (/^(\s|&nbsp;)*\/\// || /^(\s|&nbsp;){0,10}\*/ ||
-                    /^(\s|&nbsp;){0,10}\/\*/ ||
-                    /^(\s|&nbsp;)*\*\/(\s|&nbsp;)*$/);
+    return @positions if (/^\s*\/\// || /^\s{0,10}\*/ ||
+                    /^\s{0,10}\/\*/ ||
+                    /^\s*\*\/\s*$/);
     
     # Handle package Java statements.
-    if ($data =~ /^(package(\s|&nbsp;)+)([\w\.]+)(.*)$/) {
-       return $1 . $self->lxr_ident($3) . $4;
+    if ($data =~ /^(package\s+)([\w\.]+)(.*)$/) {
+       push(@positions, length($1), length($2));
+       return @positions;
     }
     
     # Handle Java import statements.
-    if ($data =~ /^(import(\s|&nbsp;)+)([\w\.]+)\.(\w+)((\s|&nbsp;)*)(.*)$/) {
-       return $1 . $self->lxr_ident($3) . "." . $self->lxr_ident($4) . "$5$7";
+    if ($data =~ /^(import\s+)([\w\.]+)\.(\w+)(\s*)(.*)$/) {
+       my $startpos = length($1);
+       push(@positions, $startpos, length($2));
+       $startpos += length($2) + 1;
+       push(@positions, $startpos, length($3));
+       return @positions;
     }
     
     # Handle #include statements.  Note, these aren't identifier lookups, but
     # need to be mapped to http://localhost.localdomain/lxr/xxx/yyy/incfile.h
     # Should include the current filename in the object for matching purposes.
 #    if (/^(\#\s*include\s+[\"<])(.*?)([\">].*)$/) {
-#      return $1 . $self->lxr_ident($2) . $3;
+#      push(@positions, length($1), length($2)
+#      return @positions;
 #    }
     
     # Break the string into potential identifiers, and look them up to see
     # if they can be hyperlinked to an LXR lookup.
-    my $idhashref = $self->{idhashref};
-    my @data_tokens = split /([A-Za-z][\w]+)/, $data;
-    my $newdata = "";
+    my @data_tokens = split /([_A-Za-z][\w]+)/, $data;
     my $in_comment = 0;
     my $eol_comment = 0;
+    my $startpos = 0;
     for (my $i = 0; $i <= $#data_tokens; $i++) {
        my $token = $data_tokens[$i];
-       if ($token =~ /^[A-Za-z]/) {
+       if ($token =~ /^[_A-Za-z]/) {
            if ($eol_comment || $in_comment) {
                # Currently in a comment, don't LXRify.
-               $newdata .= $token;
-           } elsif ($token eq "nbsp" || $token eq "quot" || $token eq "amp" ||
-                    $token eq "lt" || $token eq "gt") {
-               # HACK - ignore potential HTML entities.  This needs to be
-               # done in a smarter fashion later.
-               $newdata .= $token;
            } else {
-               $newdata .= $self->lxr_ident($token);
+               push(@positions, $startpos, length($token));
            }
        } else {
-           $newdata .= $token;
-           $token =~ s/(\s|&nbsp;)//g;
+           $token =~ s/(\s*)//;
+           $startpos += length($1);
            
            # Check if we are entering or exiting a comment.
            if ($token =~ /\/\//) {
@@ -254,9 +262,11 @@ sub lxr_data($$) {
                $in_comment = 1;
            }
        }
+
+       $startpos += length($token);
     }
 
-    return $newdata;
+    return @positions;
 }
 
 # Render a delta.  If the filename has changed since the last delta, output the
@@ -300,6 +310,7 @@ sub delta ($$$$$$$$$$) {
         # If it not a diff, show the entire delta (actually the file
         # contents) in a single column.
        $self->delta_file_header($filename, $revision, $repmatch);
+       $self->_create_highlighter($filename);
 
         print $query->Tr($query->td("&nbsp;"), $query->td("&nbsp;"),"\n");
        
@@ -313,7 +324,8 @@ sub delta ($$$$$$$$$$) {
            # Removed the delta text, where + is added to the start of each
            # line.  Also make sure the line is suitably escaped.
            $line =~ s/^\+//;
-           $line = HTML::Entities::encode($line);
+
+           $line = $self->_highlight_line($line);
 
            my $cell = $self->render_coloured_cell($line);
            my $cell_class =
@@ -326,6 +338,118 @@ sub delta ($$$$$$$$$$) {
     }
 }
 
+# Create highlighter object is possible and allowed
+sub _create_highlighter ($$) {
+    my ($self, $filename) = @_;
+
+    if ($self->{use_highlight})
+    {
+       my $hl;
+       eval "\$hl = new Syntax::Highlight::Engine::Kate();";
+
+       $hl->languageAutoSet($filename);
+
+       if ($filename =~ /.*(\.[^\.]*)$/) {
+           if (exists $Codestriker::syntax_extension_map->{$1}) {
+               $hl->language($Codestriker::syntax_extension_map->{$1});
+           }
+       }
+
+       $self->{diff_current_highlighter} = $hl;
+    }
+}
+
+# Parse line to array of pairs text-style.
+# Resulted text pieces should form the complete source line,
+# no character added, no removed
+sub _create_hltokens($$) {
+    my ($self, $line) = @_;
+
+    my @hldata;
+
+    if ($self->{diff_current_highlighter}) {
+       $self->{diff_current_highlighter}->reset();
+       @hldata = $self->{diff_current_highlighter}->highlight($line);
+    } else {
+       @hldata = [$line, "Normal"];
+    }
+
+    return @hldata;
+}
+
+# Escape entities in string and replace tabs to nbsps
+sub _htmlize ($$) {
+    my ($self, $data) = @_;
+
+    my $formatted = HTML::Entities::encode($data);
+
+    $formatted = tabadjust($self->{tabwidth}, $formatted, 1);
+    if ($self->{brmode} == $Codestriker::LINE_BREAK_ASSIST_MODE) {
+       $formatted =~ s/^(\s+)/my $sp='';for(my 
$i=0;$i<length($1);$i++){$sp.='&nbsp;'}$sp;/ge;
+    }
+    else {
+       $formatted =~ s/\s/&nbsp;/g;
+    }
+
+    return $formatted;
+}
+
+# Highlight line and place LXR links where appropriate
+sub _highlight_line ($$) {
+    my ($self, $line) = @_;
+
+    my @lxrposes = $self->lxr_data($line);
+
+    # Ensure we always have at east one pair.
+    push (@lxrposes, length($line) + 1000, 0);
+
+    my @hltokens = $self->_create_hltokens($line);
+
+    my $result = "";
+
+    # Position in original line.
+    my $startpos = 0;
+
+    # Next identifier position and length
+    my $identpos = shift @lxrposes;
+    my $identlen = shift @lxrposes;
+
+    while (@hltokens) {
+       my $text = shift @hltokens;
+       my $class = shift @hltokens;
+
+       $result .= "<span class=\"ds$class\">";
+
+       while ($startpos + length($text) > $identpos)
+       {
+           my $relpos = $identpos - $startpos;
+
+           # We believe each identifier is completely into token.
+           my $leader = substr($text, 0, $relpos);
+           my $identifier  = substr($text, $relpos, $identlen);
+
+           $result .= $self->_htmlize($leader);
+           $result .= $self->lxr_ident($identifier);
+
+           # Advance original line position.
+           $text = substr($text, $relpos + $identlen);
+           $startpos += $relpos + $identlen;
+
+           # Get next identifier pair.
+           $identpos = shift @lxrposes;
+           $identlen = shift @lxrposes;
+       }
+
+       # Process end of token.
+       $result .= $self->_htmlize($text);
+
+       $result .= "</span>";
+       $startpos += length($text);
+    }
+
+    return $result;
+}
+
 # Output the header for a series of deltas for a specific file.
 sub delta_file_header ($$$$) {
     my ($self, $filename, $revision, $repmatch) = @_;
@@ -533,6 +657,8 @@ sub delta_text ($$$$$$$$$$$) {
     my ($self, $filename, $filenumber, $revision, $old_linenumber,
        $new_linenumber, $text, $repmatch, $new, $link) = @_;
 
+    $self->_create_highlighter($filename);
+
     my $query = $self->{query};
 
     # Split up the lines, and display them, with the appropriate links.
@@ -565,24 +691,22 @@ sub display_coloured_data ($$$$) {
 
     my $query = $self->{query};
 
-    # Escape the data.
-    $data = HTML::Entities::encode($data);
-
     my $leftline = $self->{old_linenumber};
     my $rightline = $self->{new_linenumber};
     if ($data =~ /^\-(.*)$/) {
        # Line corresponds to something which has been removed.
-       add_old_change($1, $leftline);
+       add_old_change($self->_highlight_line($1), $leftline);
        $leftline++;
     } elsif ($data =~ /^\+(.*)$/) {
        # Line corresponds to something which has been removed.
-       add_new_change($1, $rightline);
+       add_new_change($self->_highlight_line($1), $rightline);
        $rightline++;
     } elsif ($data =~ /^\\/) {
        # A diff comment such as "No newline at end of file" - ignore it.
     } else {
        # Strip the first space off the diff for proper alignment.
        $data =~ s/^\s//;
+       $data = $self->_highlight_line($data);
 
        # Render the previous diff changes visually.
        $self->render_changes($filenumber, $link);
@@ -625,18 +749,6 @@ sub render_coloured_cell($$)
        return "&nbsp;";
     }
 
-    # Replace spaces and tabs with the appropriate number of &nbsp;'s.
-    $data = tabadjust($self->{tabwidth}, $data, 1);
-    if ($self->{brmode} == $Codestriker::LINE_BREAK_ASSIST_MODE) {
-       $data =~ s/^(\s+)/my $sp='';for(my 
$i=0;$i<length($1);$i++){$sp.='&nbsp;'}$sp;/ge;
-    }
-    else {
-    $data =~ s/\s/&nbsp;/g;
-    }
-
-    # Add LXR links to the output.
-    $data = $self->lxr_data($data);
-
     # Unconditionally add a &nbsp; at the start for better alignment.
     return "&nbsp;$data";
 }
@@ -1045,17 +1157,21 @@ sub display_single_filedata ($$$$$) {
     my $rightline = $self->{new_linenumber};
     my $max_line_length = $self->{max_line_length};
 
+    # Strip off all EOL symbols to display DOS-style files correctly.
+    $data =~ s/[\r\n]*$//;
+
     # Handling of either an old or new view.
     if ($data =~ /^\-(.*)$/o) {
        # A removed line.
-       $self->add_minus_monospace_line($1, $leftline++);
+       $self->add_minus_monospace_line($self->_highlight_line($1), 
$leftline++);
     } elsif ($data =~ /^\+(.*)$/o) {
        # An added line.
-       $self->add_plus_monospace_line($1, $rightline++);
+       $self->add_plus_monospace_line($self->_highlight_line($1), 
$rightline++);
     } else {
        # An unchanged line, output it and anything pending, and remove
        # the leading space for alignment reasons.
        $data =~ s/^\s//;
+       $data = $self->_highlight_line($data);
        $self->flush_monospaced_lines($filenumber, $max_line_length, $new,
                                      $link);
 
@@ -1078,9 +1194,6 @@ sub render_monospaced_line ($$$$$$$$) {
     my ($self, $filenumber, $linenumber, $new, $data, $link,
        $max_line_length, $class) = @_;
 
-    # Convert any identifier to their LXR links.
-    $data = $self->lxr_data(HTML::Entities::encode($data));
-
     my $prefix = "";
     my $digit_width = length($linenumber);
     my $max_digit_width = $self->{max_digit_width};

-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

_______________________________________________
Codestriker-user mailing list
Codestriker-user@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/codestriker-user

[Codestriker-user] Syntax highlight (patch included)

Reply via email to