This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git
The following commit(s) were added to refs/heads/master by this push:
new 84fa54e1 Allow use from Ruby scripts
84fa54e1 is described below
commit 84fa54e10898ed3811ce481a8cc6273720060245
Author: Sebb <[email protected]>
AuthorDate: Fri Feb 10 10:57:32 2023 +0000
Allow use from Ruby scripts
---
tools/parsemail.rb | 112 +++++++++++++++++++++++++++++++++--------------------
1 file changed, 71 insertions(+), 41 deletions(-)
diff --git a/tools/parsemail.rb b/tools/parsemail.rb
index 5f1c29b7..927243c2 100755
--- a/tools/parsemail.rb
+++ b/tools/parsemail.rb
@@ -25,55 +25,85 @@ require 'whimsy/asf/yaml'
require 'mail'
-MAIL_ROOT = '/srv/mail'
+module ParseMail
+ MAIL_ROOT = '/srv/mail'
-def parse_dir(maildir, yamlfile)
- data = Hash.new
-
- begin
- current = YamlFile.read(yamlfile)
- rescue Errno::ENOENT
- current = {}
+ def self.log(level, text)
+ out = $stdout
+ out = $stderr if level == :WARN
+ out.puts "#{Time.now} #{level}: #{text}"
end
-
- Dir.glob("#{maildir}/[0-9a-f][0-9a-f]*").each do |p|
+
+ def self.parse_dir(maildir, yamlfile)
+ data = Hash.new
+
+ begin
+ current = YamlFile.read(yamlfile)
+ rescue Errno::ENOENT
+ current = {}
+ end
+ log :INFO, "Current size #{current.size}"
+ entries = 0
+ dupes = 0
+ Dir.glob("#{maildir}/[0-9a-f][0-9a-f]*").each do |p|
+ entries += 1
name = File.basename(p)
- unless current[name]
- mail=Mail.read(p)
- entry = {
- Subject: mail.subject,
- Date: (mail['Date'].decoded rescue ''), # textual
- DateParsed: (mail.date.to_s rescue ''), # parsed
- From: (mail['From'].decoded rescue ''),
- To: (mail['To'].decoded rescue ''),
- Cc: (mail['Cc'].decoded rescue ''),
- # list of destination emails
- Emails: [(mail[:to].addresses.map(&:to_str) rescue
[]),(mail[:cc].addresses.map(&:to_str) rescue [])].flatten,
- MessageId: mail.message_id, # could be nil
- EnvelopeFrom: mail.envelope_from,
- EnvelopeDate: mail.envelope_date.to_s, # effectively the
delivery date to the mailing list
- }
- data[name] = entry
+ if current[name]
+ dupes += 1
+ else
+ mail=Mail.read(p)
+ entry = {
+ Subject: mail.subject,
+ Date: (mail['Date'].decoded rescue ''), # textual
+ DateParsed: (mail.date.to_s rescue ''), # parsed
+ From: (mail['From'].decoded rescue ''),
+ To: (mail['To'].decoded rescue ''),
+ Cc: (mail['Cc'].decoded rescue ''),
+ # list of destination emails
+ Emails: [(mail[:to].addresses.map(&:to_str) rescue
[]),(mail[:cc].addresses.map(&:to_str) rescue [])].flatten,
+ MessageId: mail.message_id, # could be nil
+ EnvelopeFrom: mail.envelope_from,
+ EnvelopeDate: mail.envelope_date.to_s, # effectively the delivery
date to the mailing list
+ }
+ data[name] = entry
end
- end
+ end
- # update the file with any new entries
- YamlFile.update(yamlfile) do |yaml|
- data.each do |k,v|
- unless yaml[k] # don't update existing entries
- yaml[k] = v
- end
+ log :INFO, "Found #{entries} files, with #{dupes} duplicates, giving
#{data.size} new entries"
+
+ if data.size == 0
+ log :INFO, "No new entries found"
+ else
+ # update the file with any new entries
+ YamlFile.update(yamlfile) do |yaml|
+ data.each do |k,v|
+ unless yaml[k] # don't update existing entries (should rarely
happen)
+ yaml[k] = v
+ end
+ end
+ yaml
end
- yaml
+ end
+ end
+
+ # indirection is to allow external code to require this file so it can be
invoked
+ # without needing to shell out for a possibly expensive ruby!
+
+ def self.parse_main(args)
+ list = args.shift || 'board' # provide the list on the command line (e.g.
board)
+ yyyymm = args.shift || Time.now.strftime('%Y%m')
+ yamlfile = args.shift || File.join(MAIL_ROOT, list, "#{yyyymm}.yaml") #
where to find the YAML summary
+
+ maildir = File.join(MAIL_ROOT, list, yyyymm) # where to find the mail files
+ if Dir.exists? maildir
+ log :INFO, "Processing #{maildir}"
+ parse_dir(maildir, yamlfile)
+ else
+ log :WARN, "Could not find #{maildir}"
+ end
end
end
if __FILE__ == $0
- list = ARGV.shift || 'board' # provide the list on the command line (e.g.
board)
- yyyymm = ARGV.shift || Time.now.strftime('%Y%m')
- yamlfile = ARGV.shift || File.join(MAIL_ROOT, list, "#{yyyymm}.yaml") #
where to find the YAML summary
-
- maildir = File.join(MAIL_ROOT, list, yyyymm) # where to find the mail files
-
- parse_dir(maildir, yamlfile)
+ ParseMail.parse_main(ARGV)
end