Script 'mail_helper' called by obssrc
Hello community,
here is the log from the commit of package rubygem-red-datasets for
openSUSE:Factory checked in at 2021-08-24 10:54:27
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/rubygem-red-datasets (Old)
and /work/SRC/openSUSE:Factory/.rubygem-red-datasets.new.1899 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "rubygem-red-datasets"
Tue Aug 24 10:54:27 2021 rev:2 rq:912502 version:0.1.4
Changes:
--------
---
/work/SRC/openSUSE:Factory/rubygem-red-datasets/rubygem-red-datasets.changes
2021-07-02 13:28:54.036093232 +0200
+++
/work/SRC/openSUSE:Factory/.rubygem-red-datasets.new.1899/rubygem-red-datasets.changes
2021-08-24 10:55:18.716286054 +0200
@@ -1,0 +2,21 @@
+Mon Jul 26 06:05:30 UTC 2021 - Stephan Kulow <[email protected]>
+
+updated to version 0.1.4
+ see installed news.md
+
+ ## 0.1.4 - 2021-07-13
+
+ ### Improvements
+
+ * `Datasets::SudachiSynonymDictionary`: Stopped depending on `LANG`.
+
+ ## 0.1.3 - 2021-07-09
+
+ ### Improvements
+
+ * `Datasets::SeabornData`: Added.
+
+ * `Datasets::SudachiSynonymDictionary`: Added.
+
+
+-------------------------------------------------------------------
Old:
----
red-datasets-0.1.2.gem
New:
----
red-datasets-0.1.4.gem
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ rubygem-red-datasets.spec ++++++
--- /var/tmp/diff_new_pack.uzijJo/_old 2021-08-24 10:55:19.364285195 +0200
+++ /var/tmp/diff_new_pack.uzijJo/_new 2021-08-24 10:55:19.368285190 +0200
@@ -24,7 +24,7 @@
#
Name: rubygem-red-datasets
-Version: 0.1.2
+Version: 0.1.4
Release: 0
%define mod_name red-datasets
%define mod_full_name %{mod_name}-%{version}
++++++ red-datasets-0.1.2.gem -> red-datasets-0.1.4.gem ++++++
Binary files old/checksums.yaml.gz and new/checksums.yaml.gz differ
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/doc/text/news.md new/doc/text/news.md
--- old/doc/text/news.md 2021-06-03 09:12:14.000000000 +0200
+++ new/doc/text/news.md 2021-07-13 10:48:32.000000000 +0200
@@ -1,5 +1,19 @@
# News
+## 0.1.4 - 2021-07-13
+
+### Improvements
+
+ * `Datasets::SudachiSynonymDictionary`: Stopped depending on `LANG`.
+
+## 0.1.3 - 2021-07-09
+
+### Improvements
+
+ * `Datasets::SeabornData`: Added.
+
+ * `Datasets::SudachiSynonymDictionary`: Added.
+
## 0.1.2 - 2021-06-03
### Improvements
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/lib/datasets/cifar.rb new/lib/datasets/cifar.rb
--- old/lib/datasets/cifar.rb 2021-06-03 09:12:14.000000000 +0200
+++ new/lib/datasets/cifar.rb 2021-07-13 10:48:32.000000000 +0200
@@ -1,4 +1,4 @@
-require_relative "tar_gz_readable"
+require_relative "tar-gz-readable"
require_relative "dataset"
module Datasets
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/lib/datasets/cldr-plurals.rb
new/lib/datasets/cldr-plurals.rb
--- old/lib/datasets/cldr-plurals.rb 2021-06-03 09:12:14.000000000 +0200
+++ new/lib/datasets/cldr-plurals.rb 2021-07-13 10:48:32.000000000 +0200
@@ -183,7 +183,7 @@
end
value = parse_value
if value.nil?
- raise Error.new("no value for #{operator}: #{@scanner.inspect}")
+ raise Error, "no value for #{operator}: #{@scanner.inspect}"
end
[operator, expr, value]
end
@@ -267,7 +267,7 @@
if operator
value = parse_value
if value.nil?
- raise Error.new("no value for #{operator}: #{@scanner.inspect}")
+ raise Error, "no value for #{operator}: #{@scanner.inspect}"
end
[operator, operand, value]
else
@@ -336,7 +336,7 @@
skip_whitespaces
# U+2026 HORIZONTAL ELLIPSIS
unless @scanner.scan(/\u2026|\.\.\./)
- raise "no ellipsis: #{@scanner.inspect}"
+ raise Error, "no ellipsis: #{@scanner.inspect}"
end
samples << :elipsis
end
@@ -362,7 +362,7 @@
skip_whitespaces
decimal = @scanner.scan(/[0-9]+/)
if decimal.nil?
- raise "no decimal: #{@scanner.inspect}"
+ raise Error, "no decimal: #{@scanner.inspect}"
end
value += Float("0.#{decimal}")
skip_whitespaces
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/lib/datasets/mnist.rb new/lib/datasets/mnist.rb
--- old/lib/datasets/mnist.rb 2021-06-03 09:12:14.000000000 +0200
+++ new/lib/datasets/mnist.rb 2021-07-13 10:48:32.000000000 +0200
@@ -65,7 +65,9 @@
n_bytes = n_uint32s * 4
mnist_magic_number = 2051
magic, n_images, n_rows, n_cols = f.read(n_bytes).unpack("N*")
- raise "This is not #{dataset_name} image file" if magic !=
mnist_magic_number
+ if magic != mnist_magic_number
+ raise Error, "This is not #{dataset_name} image file"
+ end
n_images.times do |i|
data = f.read(n_rows * n_cols)
label = labels[i]
@@ -99,7 +101,9 @@
n_bytes = n_uint32s * 2
mnist_magic_number = 2049
magic, n_labels = f.read(n_bytes).unpack('N2')
- raise "This is not #{dataset_name} label file" if magic !=
mnist_magic_number
+ if magic != mnist_magic_number
+ raise Error, "This is not #{dataset_name} label file"
+ end
f.read(n_labels).unpack('C*')
end
end
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/lib/datasets/rdatasets.rb
new/lib/datasets/rdatasets.rb
--- old/lib/datasets/rdatasets.rb 2021-06-03 09:12:14.000000000 +0200
+++ new/lib/datasets/rdatasets.rb 2021-07-13 10:48:32.000000000 +0200
@@ -1,5 +1,5 @@
require_relative "dataset"
-require_relative "tar_gz_readable"
+require_relative "tar-gz-readable"
module Datasets
class RdatasetsList < Dataset
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/lib/datasets/seaborn-data.rb
new/lib/datasets/seaborn-data.rb
--- old/lib/datasets/seaborn-data.rb 1970-01-01 01:00:00.000000000 +0100
+++ new/lib/datasets/seaborn-data.rb 2021-07-13 10:48:32.000000000 +0200
@@ -0,0 +1,49 @@
+module Datasets
+ class SeabornData < Dataset
+ URL_FORMAT =
"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/%{name}.csv".freeze
+
+ def initialize(name)
+ super()
+ @metadata.id = "seaborn-data-#{name}"
+ @metadata.name = "SeabornData: #{name}"
+ @metadata.url = URL_FORMAT % {name: name}
+
+ @data_path = cache_dir_path + (name + ".csv")
+ @name = name
+ end
+
+ def each(&block)
+ return to_enum(__method__) unless block_given?
+
+ download(@data_path, @metadata.url) unless @data_path.exist?
+ CSV.open(@data_path, headers: :first_row, converters: :all) do |csv|
+ csv.each do |row|
+ record = prepare_record(row)
+ yield record
+ end
+ end
+ end
+
+ private
+ def prepare_record(csv_row)
+ record = csv_row.to_h
+ record.transform_keys!(&:to_sym)
+
+ # Perform the same preprocessing as seaborn's load_dataset function
+ preprocessor = :"preprocess_#{@name}_record"
+ __send__(preprocessor, record) if respond_to?(preprocessor, true)
+
+ record
+ end
+
+ # The same preprocessing as seaborn.load_dataset
+ def preprocess_flights_record(record)
+ record[:month] &&= record[:month][0,3]
+ end
+
+ # The same preprocessing as seaborn.load_dataset
+ def preprocess_penguins_record(record)
+ record[:sex] &&= record[:sex].capitalize
+ end
+ end
+end
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/lib/datasets/sudachi-synonym-dictionary.rb
new/lib/datasets/sudachi-synonym-dictionary.rb
--- old/lib/datasets/sudachi-synonym-dictionary.rb 1970-01-01
01:00:00.000000000 +0100
+++ new/lib/datasets/sudachi-synonym-dictionary.rb 2021-07-13
10:48:32.000000000 +0200
@@ -0,0 +1,171 @@
+require "csv"
+
+require_relative "dataset"
+
+module Datasets
+ class SudachiSynonymDictionary < Dataset
+ class Synonym < Struct.new(:group_id,
+ :is_noun,
+ :expansion_type,
+ :lexeme_id,
+ :form_type,
+ :acronym_type,
+ :variant_type,
+ :categories,
+ :notation)
+ alias_method :noun?, :is_noun
+ end
+
+ def initialize
+ super()
+ @metadata.id = "sudachi-synonym-dictionary"
+ @metadata.name = "Sudachi synonym dictionary"
+ @metadata.url =
"https://github.com/WorksApplications/SudachiDict/blob/develop/docs/synonyms.md"
+ @metadata.licenses = [
+ "Apache-2.0",
+ ]
+ @metadata.description = lambda do
+ download_description
+ end
+ end
+
+ def each
+ return to_enum(__method__) unless block_given?
+
+ lexeme_id_context = {}
+ open_data do |csv|
+ csv.each do |row|
+ group_id = row[0]
+ if group_id != lexeme_id_context[:group_id]
+ lexeme_id_context[:group_id] = group_id
+ lexeme_id_context[:counter] = 0
+ end
+ is_noun = (row[1] == "1")
+ expansion_type = normalize_expansion_type(row[2])
+ lexeme_id = normalize_lexeme_id(row[3], lexeme_id_context)
+ form_type = normalize_form_type(row[4])
+ acronym_type = normalize_acronym_type(row[5])
+ variant_type = normalize_variant_type(row[6])
+ categories = normalize_categories(row[7])
+ notation = row[8]
+ synonym = Synonym.new(group_id,
+ is_noun,
+ expansion_type,
+ lexeme_id,
+ form_type,
+ acronym_type,
+ variant_type,
+ categories,
+ notation)
+ yield(synonym)
+ end
+ end
+ end
+
+ private
+ def open_data
+ data_path = cache_dir_path + "synonyms.txt"
+ unless data_path.exist?
+ data_url =
"https://raw.githubusercontent.com/WorksApplications/SudachiDict/develop/src/main/text/synonyms.txt"
+ download(data_path, data_url)
+ end
+ CSV.open(data_path,
+ encoding: "UTF-8",
+ skip_blanks: true) do |csv|
+ yield(csv)
+ end
+ end
+
+ def download_description
+ description_path = cache_dir_path + "synonyms.md"
+ unless description_path.exist?
+ description_url =
"https://raw.githubusercontent.com/WorksApplications/SudachiDict/develop/docs/synonyms.md"
+ download(description_path, description_url)
+ end
+ description_path.read
+ end
+
+ def normalize_expansion_type(type)
+ case type
+ when "0", ""
+ :always
+ when "1"
+ :expanded
+ when "2"
+ :never
+ else
+ raise Error, "unknown expansion type: #{type.inspect}"
+ end
+ end
+
+ def normalize_lexeme_id(id, context)
+ case id
+ when ""
+ lexeme_id_context[:counter] += 1
+ lexeme_id_context[:counter]
+ else
+ # Use only the first lexeme ID.
+ # Example:
+ # 000116,1,0,1/2,0,2,0,(IT/??????),???????????????,,
+ # 000116,1,0,1/2,0,2,0,(IT/??????),?????????,,
+ Integer(id.split("/").first, 10)
+ end
+ end
+
+ def normalize_form_type(type)
+ case type
+ when "0", ""
+ :typical
+ when "1"
+ :translation
+ when "2"
+ :alias
+ when "3"
+ :old_name
+ when "4"
+ :misnomer
+ else
+ raise Error, "unknown form type: #{type.inspect}"
+ end
+ end
+
+ def normalize_acronym_type(type)
+ case type
+ when "0", ""
+ :typical
+ when "1"
+ :alphabet
+ when "2"
+ :others
+ else
+ raise Error, "unknown acronym type: #{type.inspect}"
+ end
+ end
+
+ def normalize_variant_type(type)
+ case type
+ when "0", ""
+ :typical
+ when "1"
+ :alphabet
+ when "2"
+ :general
+ when "3"
+ :misspelled
+ else
+ raise Error, "unknown variant type: #{type.inspect}"
+ end
+ end
+
+ def normalize_categories(categories)
+ case categories
+ when ""
+ nil
+ when /\A\((.*)\)\z/
+ $1.split("/")
+ else
+ raise Error, "invalid categories: #{categories.inspect}"
+ end
+ end
+ end
+end
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/lib/datasets/tar-gz-readable.rb
new/lib/datasets/tar-gz-readable.rb
--- old/lib/datasets/tar-gz-readable.rb 1970-01-01 01:00:00.000000000 +0100
+++ new/lib/datasets/tar-gz-readable.rb 2021-07-13 10:48:32.000000000 +0200
@@ -0,0 +1,14 @@
+require "rubygems/package"
+require "zlib"
+
+module Datasets
+ module TarGzReadable
+ def open_tar_gz(data_path)
+ Zlib::GzipReader.open(data_path) do |f|
+ Gem::Package::TarReader.new(f) do |tar|
+ yield(tar)
+ end
+ end
+ end
+ end
+end
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/lib/datasets/tar_gz_readable.rb
new/lib/datasets/tar_gz_readable.rb
--- old/lib/datasets/tar_gz_readable.rb 2021-06-03 09:12:14.000000000 +0200
+++ new/lib/datasets/tar_gz_readable.rb 1970-01-01 01:00:00.000000000 +0100
@@ -1,14 +0,0 @@
-require "rubygems/package"
-require "zlib"
-
-module Datasets
- module TarGzReadable
- def open_tar_gz(data_path)
- Zlib::GzipReader.open(data_path) do |f|
- Gem::Package::TarReader.new(f) do |tar|
- yield(tar)
- end
- end
- end
- end
-end
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/lib/datasets/version.rb new/lib/datasets/version.rb
--- old/lib/datasets/version.rb 2021-06-03 09:12:14.000000000 +0200
+++ new/lib/datasets/version.rb 2021-07-13 10:48:32.000000000 +0200
@@ -1,3 +1,3 @@
module Datasets
- VERSION = "0.1.2"
+ VERSION = "0.1.4"
end
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/lib/datasets.rb new/lib/datasets.rb
--- old/lib/datasets.rb 2021-06-03 09:12:14.000000000 +0200
+++ new/lib/datasets.rb 2021-07-13 10:48:32.000000000 +0200
@@ -16,5 +16,7 @@
require_relative "datasets/penn-treebank"
require_relative "datasets/postal-code-japan"
require_relative "datasets/rdatasets"
+require_relative "datasets/seaborn-data"
+require_relative "datasets/sudachi-synonym-dictionary"
require_relative "datasets/wikipedia"
require_relative "datasets/wine"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/metadata new/metadata
--- old/metadata 2021-06-03 09:12:14.000000000 +0200
+++ new/metadata 2021-07-13 10:48:32.000000000 +0200
@@ -1,7 +1,7 @@
--- !ruby/object:Gem::Specification
name: red-datasets
version: !ruby/object:Gem::Version
- version: 0.1.2
+ version: 0.1.4
platform: ruby
authors:
- tomisuker
@@ -9,7 +9,7 @@
autorequire:
bindir: bin
cert_chain: []
-date: 2021-06-03 00:00:00.000000000 Z
+date: 2021-07-13 00:00:00.000000000 Z
dependencies:
- !ruby/object:Gem::Dependency
name: csv
@@ -162,8 +162,10 @@
- lib/datasets/penn-treebank.rb
- lib/datasets/postal-code-japan.rb
- lib/datasets/rdatasets.rb
+- lib/datasets/seaborn-data.rb
+- lib/datasets/sudachi-synonym-dictionary.rb
- lib/datasets/table.rb
-- lib/datasets/tar_gz_readable.rb
+- lib/datasets/tar-gz-readable.rb
- lib/datasets/version.rb
- lib/datasets/wikipedia.rb
- lib/datasets/wine.rb
@@ -189,6 +191,8 @@
- test/test-penn-treebank.rb
- test/test-postal-code-japan.rb
- test/test-rdatasets.rb
+- test/test-seaborn-data.rb
+- test/test-sudachi-synonym-dictionary.rb
- test/test-table.rb
- test/test-wikipedia.rb
- test/test-wine.rb
@@ -237,6 +241,8 @@
- test/test-penn-treebank.rb
- test/test-postal-code-japan.rb
- test/test-rdatasets.rb
+- test/test-seaborn-data.rb
+- test/test-sudachi-synonym-dictionary.rb
- test/test-table.rb
- test/test-wikipedia.rb
- test/test-wine.rb
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/test/test-cldr-plurals.rb
new/test/test-cldr-plurals.rb
--- old/test/test-cldr-plurals.rb 2021-06-03 09:12:14.000000000 +0200
+++ new/test/test-cldr-plurals.rb 2021-07-13 10:48:32.000000000 +0200
@@ -14,7 +14,7 @@
test("#each") do
locales = @dataset.each.to_a
assert_equal([
- 215,
+ 218,
locale("bm",
[
rule("other",
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/test/test-rdatasets.rb new/test/test-rdatasets.rb
--- old/test/test-rdatasets.rb 2021-06-03 09:12:14.000000000 +0200
+++ new/test/test-rdatasets.rb 2021-07-13 10:48:32.000000000 +0200
@@ -48,7 +48,7 @@
test("without package_name") do
records = @dataset.each.to_a
assert_equal([
- 1478,
+ 1714,
{
package: "AER",
dataset: "Affairs",
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/test/test-seaborn-data.rb
new/test/test-seaborn-data.rb
--- old/test/test-seaborn-data.rb 1970-01-01 01:00:00.000000000 +0100
+++ new/test/test-seaborn-data.rb 2021-07-13 10:48:32.000000000 +0200
@@ -0,0 +1,97 @@
+class SeabornDataTest < Test::Unit::TestCase
+ sub_test_case("fmri") do
+ def setup
+ @dataset = Datasets::SeabornData.new("fmri")
+ end
+
+ def test_each
+ records = @dataset.each.to_a
+ assert_equal([
+ 1064,
+ {
+ subject: "s5",
+ timepoint: 14,
+ event: "stim",
+ region: "parietal",
+ signal: -0.0808829319505
+ },
+ {
+ subject: "s0",
+ timepoint: 0,
+ event: "cue",
+ region: "parietal",
+ signal: -0.00689923478092
+ }
+ ],
+ [
+ records.size,
+ records[1].to_h,
+ records[-1].to_h
+ ])
+ end
+ end
+
+ sub_test_case("flights") do
+ def setup
+ @dataset = Datasets::SeabornData.new("flights")
+ end
+
+ def test_each
+ records = @dataset.each.to_a
+ assert_equal([
+ 144,
+ {
+ year: 1949,
+ month: "Feb",
+ passengers: 118
+ },
+ {
+ year: 1960,
+ month: "Dec",
+ passengers: 432
+ }
+ ],
+ [
+ records.size,
+ records[1].to_h,
+ records[-1].to_h
+ ])
+ end
+ end
+
+ sub_test_case("penguins") do
+ def setup
+ @dataset = Datasets::SeabornData.new("penguins")
+ end
+
+ def test_each
+ records = @dataset.each.to_a
+ assert_equal([
+ 344,
+ {
+ species: "Adelie",
+ island: "Torgersen",
+ bill_length_mm: 39.5,
+ bill_depth_mm: 17.4,
+ flipper_length_mm: 186,
+ body_mass_g: 3800,
+ sex: "Female"
+ },
+ {
+ species: "Gentoo",
+ island: "Biscoe",
+ bill_length_mm: 49.9,
+ bill_depth_mm: 16.1,
+ flipper_length_mm: 213,
+ body_mass_g: 5400,
+ sex: "Male"
+ }
+ ],
+ [
+ records.size,
+ records[1].to_h,
+ records[-1].to_h
+ ])
+ end
+ end
+end
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/test/test-sudachi-synonym-dictionary.rb
new/test/test-sudachi-synonym-dictionary.rb
--- old/test/test-sudachi-synonym-dictionary.rb 1970-01-01 01:00:00.000000000
+0100
+++ new/test/test-sudachi-synonym-dictionary.rb 2021-07-13 10:48:32.000000000
+0200
@@ -0,0 +1,48 @@
+class SudachiSynonymDictionaryTest < Test::Unit::TestCase
+ def setup
+ @dataset = Datasets::SudachiSynonymDictionary.new
+ end
+
+ test('#each') do
+ records = @dataset.each.to_a
+ assert_equal([
+ 61335,
+ {
+ group_id: "000001",
+ is_noun: true,
+ expansion_type: :always,
+ lexeme_id: 1,
+ form_type: :typical,
+ acronym_type: :typical,
+ variant_type: :typical,
+ categories: [],
+ notation: "??????",
+ },
+ {
+ group_id: "023705",
+ is_noun: true,
+ expansion_type: :always,
+ lexeme_id: 1,
+ form_type: :typical,
+ acronym_type: :alphabet,
+ variant_type: :typical,
+ categories: ["??????"],
+ notation: "GB",
+ },
+ ],
+ [
+ records.size,
+ records[0].to_h,
+ records[-1].to_h,
+ ])
+ end
+
+ sub_test_case('#metadata') do
+ test('#description') do
+ description = @dataset.metadata.description
+ assert do
+ description.start_with?('# Sudachi ???????????????')
+ end
+ end
+ end
+end