[whimsy] branch master updated: Spacing issues

sebb Wed, 13 Oct 2021 15:51:38 -0700

This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git



The following commit(s) were added to refs/heads/master by this push:
     new 1122352  Spacing issues
1122352 is described below

commit 1122352fce471915fcf95d3634c8b9365b6068f1
Author: Sebb <[email protected]>
AuthorDate: Wed Oct 13 23:51:27 2021 +0100

    Spacing issues
---
 www/secretary/iclaparser.rb | 68 ++++++++++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/www/secretary/iclaparser.rb b/www/secretary/iclaparser.rb
index 973a6c5..3cf65ec 100644
--- a/www/secretary/iclaparser.rb
+++ b/www/secretary/iclaparser.rb
@@ -59,7 +59,7 @@ module ICLAParser
     end
 
     def set_text_font_and_size(*args)
-       @tfs=args
+      @tfs = args
     end
 
     def show_text(string)
@@ -69,23 +69,23 @@ module ICLAParser
     end
 
     def show_text_with_positioning(*args)
-        font = @fontdict[@tfs.first]
-        # args are Strings (in the current font encoding) interspersed with 
integer spacing adjustments; only want the strings
-        # We assume the positioning does not overlay characters so can be 
ignored
-        chars = []
-        args.flatten.each do |arg|
-          if arg.is_a?(String)
-            char = ICLAParser.string_to_utf8(arg, font)
-            chars << char
-          end
-        end
-        val = chars.join("").strip
-        len = val.length
-        # some PDFs have the individual text in this format so skip long lines 
which are unlikely to be user data
-        # Could perhaps have full list of expected text lines instead.
-        unless len == 0 or len > 50 or SKIP.include? val
-          @texts << val
+      font = @fontdict[@tfs.first]
+      # args are Strings (in the current font encoding) interspersed with 
integer spacing adjustments; only want the strings
+      # We assume the positioning does not overlay characters so can be ignored
+      chars = []
+      args.flatten.each do |arg|
+        if arg.is_a?(String)
+          char = ICLAParser.string_to_utf8(arg, font)
+          chars << char
         end
+      end
+      val = chars.join("").strip
+      len = val.length
+      # some PDFs have the individual text in this format so skip long lines 
which are unlikely to be user data
+      # Could perhaps have full list of expected text lines instead.
+      unless len == 0 or len > 50 or SKIP.include? val
+        @texts << val
+      end
     end
 
     def get_text
@@ -132,36 +132,36 @@ module ICLAParser
 
   # canonicalise the names found in the PDF
   def self.canon_field_name(pdfname)
-    NAME2FIELD[pdfname.gsub(' ','').downcase] || pdfname
+    NAME2FIELD[pdfname.gsub(' ', '').downcase] || pdfname
   end
 
   def self.encode(val)
-    if val.bytes[0..1] == [254,255]
-      val = val.encode('utf-8','utf-16').strip
+    if val.bytes[0..1] == [254, 255]
+      val = val.encode('utf-8', 'utf-16').strip
     else
       begin
         val = val.encode('utf-8').strip
       rescue Encoding::UndefinedConversionError
-        val = val.encode('utf-8','iso-8859-1').strip
+        val = val.encode('utf-8', 'iso-8859-1').strip
       end
     end
-    val.gsub("\x7F",'') # Not sure where these originate
+    val.gsub("\x7F", '') # Not sure where these originate
   end
 
   # parse the PDF
   def self.parse(path)
-    data=Hash.new
+    data = {}
     metadata = {}
     data[:_meta] = metadata
     metadata[:dataSource] = {} # have we found anything
     freetext = {} # gather the free text details
-    debug={}
+    debug = {}
     begin
       reader = PDF::Reader.new(path)
       %w(pdf_version info metadata page_count).each do |i|
         metadata[i] = reader.public_send(i)
       end
-      reader.objects.each do |k,v|
+      reader.objects.each do |_k, v|
         type = v[:Type] rescue nil
         subtype = v[:Subtype] rescue nil
 
@@ -176,7 +176,7 @@ module ICLAParser
               contents = v[:Contents]
               if contents and contents.length > 0 and contents != "\x14" # 
ignore "\x14" == ASCII DC4
                 # Entries may be duplicated, so use a hash to store them
-                id = rect.inspect+contents # if the rect and contents match, 
then they overwrite each other
+                id = rect.inspect + contents # if the rect and contents match, 
then they overwrite each other
                 freetext[id] = {Contents: contents.strip, x: rect[0], y: 
rect[1]}
                 metadata[:dataSource]['FreeText'] = true
               end
@@ -218,17 +218,17 @@ module ICLAParser
         # split into separate chunks if the difference in Y is more than a few 
points
         how_close = 3
         freetext.values. # no need for ids any more
-          sort_by{|e| -e[:y] }. # sort by Y desc
-          slice_when{|i,j| (i[:y]-j[:y]) > how_close}. # gather nearby Y 
values in case there are multiple entries on a line
+          sort_by {|e| -e[:y] }. # sort by Y desc
+          slice_when {|i, j| (i[:y] - j[:y]) > how_close}. # gather nearby Y 
values in case there are multiple entries on a line
           each do |k|
             data[:text] << k.
-              sort_by{|l| l[:x]}. # sort by X ascending
-              map{|v| v[:Contents]}.join(", ")
-        end
+            sort_by {|l| l[:x]}. # sort by X ascending
+            map {|v| v[:Contents]}.join(", ")
+          end
       end
       if metadata[:dataSource].size == 0 or ((data[:text].size rescue 0) <= 1 
and data.size < 3) # No annotations found or not useful
         page1 = nil # cache for page 1
-        fontdict = Hash.new
+        fontdict = {}
         # Try looking for text sections instead
         receiver = Receiver.new(fontdict)
         reader.pages.each do |page|
@@ -256,7 +256,7 @@ module ICLAParser
               # split into headers
               form.slice_before(/^\s+.+:/).each do |lines|
                 # trim leading and trailing blanks and underscores and drop 
blank lines
-                line = lines.map{|l| l.sub(/^[ _]+/,'').sub(/[ 
_]+$/,'')}.select{|l| l.length > 0}.join(',')
+                line = lines.map {|l| l.sub(/^[ _]+/, '').sub(/[ _]+$/, 
'')}.select {|l| l.length > 0}.join(',')
                 case line
                   when /^\s*(?:\(optional\) )?(.+):\s+(.*)/
                     data[canon_field_name($1)] = $2 unless $2 == ',' or $2 == 
'' # empty line
@@ -270,7 +270,7 @@ module ICLAParser
         end
       end
     rescue Exception => e
-      data[:error]="Error processing #{path} => #{e.inspect} #{caller}"
+      data[:error] = "Error processing #{path} => #{e.inspect} #{caller}"
     end
 #    data[:debug] = debug
     # TODO attempt to classify data[:text] items?

[whimsy] branch master updated: Spacing issues

Reply via email to