#!/usr/local/bin/ruby -w

# Tool to replace one chunk of text with another
#
# Usage: ruby fix-license.rb [-f] dir dir ...
# Without the -f it acts like a dry-run: no changes are made, only list of files that would be
# modified is printed
#

%w{find tmpdir}.each{ |f| require f }

class Fix
  C_BEGIN = '/*'
  C_END = '*/'
  OLD_RE1 = /\s+\*\s+Copyright \(C\) 201[0-9]/oi
  OLD_RE2 = /\s+\*\s+limitations under the License\./oi
  OLD_RE3 = /\s+\*\s+Put your copyright and license info here\./oi

  NEW_STR =
'/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */'

  @@tmpdir = Dir.mktmpdir("fix-license")

  # fix -- if true, replace tabs with spaces
  # file_ok -- list of files where some copyright notice was found
  # file_none -- list of files where old string was not found
  # file_bad -- list of files where no matching comment was found
  #
  attr :fix, :file_list, :file_none
  
  def initialize(f)
    @fix = f
    @file_ok, @file_none, @file_bad = [], [], []
  end

  # look for tabs in argument file
  def process(path)
    content = IO.read path
    #puts "path = %s, content.size = %d" % [path, content.size]
    start, finish = content.index(C_BEGIN), content.index(C_END)

    if ! (start && finish)    # no comment
      @file_none << path
      return
    end

    idx1, idx2 = content.index(OLD_RE1), content.index(OLD_RE2)

    # some files have these variations:
    #
    # /*
    #  *  Copyright (c) 2012-2013 DataTorrent, Inc.
    #  *  All Rights Reserved.
    #  */
    #
    # /*
    #  * Copyright (c) 2014 DataTorrent, Inc. ALL Rights Reserved.
    #  */
    #
    # /**
    #  * Put your copyright and license info here.
    #  */
    #
    if ! (idx1 || idx2)
      idx3 = content.index(OLD_RE3)
      if idx3 && start < idx3 && idx3 < finish         # file is good
        @file_ok << path
        return
      end
    end
    if idx1 && ! idx2
      if start < idx1 && idx1 < finish         # file is good
        @file_ok << path
        return
      end
    end

    if ! (idx1 && idx2)
      @file_bad << path
      return
    end

    range = (start + 2)..finish
    if ! (range.cover?(idx1) && range.cover?(idx2))        # should never happen
      raise 'Error: Matching strings found outside comment: start = %d, idx1 = %d, ' +
            'idx2 = %d, finish = %d, path = %s' % [start, idx1, idx2, finish, path]
    end

    # file is good
    @file_ok << path
  end

  def wrapup
    if @file_ok.empty?
      puts '=' * 60
      puts "No fixable Java files found"
      return
    end

    # print file names
    puts '=' * 60
    puts "%d fixable files found:" % @file_ok.size
    @file_ok.each_with_index{ |file, i| puts "%d: %20s" % [i, file] }

    # files with no comments at all
    if ! @file_none.empty?
      puts '=' * 60
      puts "%d Java files with no comments" % @file_none.size
      @file_none.each_with_index{ |file, i| puts "%d: %20s" % [i, file] }
    end

    # files with comments but no matching copyright
    if ! @file_bad.empty?
      puts '=' * 60
      puts "%d Java files with no matching copyright comment" % @file_bad.size
      @file_bad.each_with_index{ |file, i| puts "%d: %20s" % [i, file] }
    end
  end

  # main entry point
  def self.go
    raise "Error: need list of directories" if ARGV.empty?

    fix = false
    if (ARGV.first == "-f")
      fix = true
      ARGV.shift
    end
    fixer = Fix.new fix

    # check that each argument exists, is a directory and is readable
    dir_list = []
    ARGV.each { |dir|
      d = dir.strip
      raise "Not found: '%s'"       % d if ! File.exist?      d
      raise "Not a directory: '%s'" % d if ! File.directory?  d
      raise "Not readable: '%s'"    % d if ! File.readable?   d
      raise "Not searchable: '%s'"  % d if ! File.executable? d
      dir_list << d
    }

    # recursively traverse each directory
    dir_list.each { |dir|
      Find.find(dir) do |path|
        if File.directory?(path)

          # skip directories starting with a dot
          Find.prune if ( File.basename(path)[0] == ?. )
          next
        elsif path.end_with?(".java") && File.file?(path)
          fixer.process path
        end
      end
    }

    fixer.wrapup
  end  # go

end  # Fix

Fix.go
