commit perl-Lingua-EN-Sentence for openSUSE:Factory

Source-Sync Wed, 12 Jul 2023 09:44:33 -0700

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package perl-Lingua-EN-Sentence for 
openSUSE:Factory checked in at 2023-07-12 17:26:52
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/perl-Lingua-EN-Sentence (Old)
 and      /work/SRC/openSUSE:Factory/.perl-Lingua-EN-Sentence.new.8922 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "perl-Lingua-EN-Sentence"

Wed Jul 12 17:26:52 2023 rev:17 rq:1098155 version:0.34

Changes:
--------
--- 
/work/SRC/openSUSE:Factory/perl-Lingua-EN-Sentence/perl-Lingua-EN-Sentence.changes
  2022-07-13 15:35:05.333377829 +0200
+++ 
/work/SRC/openSUSE:Factory/.perl-Lingua-EN-Sentence.new.8922/perl-Lingua-EN-Sentence.changes
        2023-07-12 17:27:11.142436451 +0200
@@ -1,0 +2,6 @@
+Wed Jun 21 03:07:10 UTC 2023 - Tina MÃ¼ller <[email protected]>
+
+- updated to 0.34
+   see /usr/share/doc/packages/perl-Lingua-EN-Sentence/Changes
+
+-------------------------------------------------------------------

Old:
----
  Lingua-EN-Sentence-0.33.tar.gz

New:
----
  Lingua-EN-Sentence-0.34.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ perl-Lingua-EN-Sentence.spec ++++++
--- /var/tmp/diff_new_pack.3R7GNd/_old  2023-07-12 17:27:11.922440998 +0200
+++ /var/tmp/diff_new_pack.3R7GNd/_new  2023-07-12 17:27:11.926441022 +0200
@@ -1,7 +1,7 @@
 #
 # spec file for package perl-Lingua-EN-Sentence
 #
-# Copyright (c) 2022 SUSE LLC
+# Copyright (c) 2023 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -18,7 +18,7 @@
 
 %define cpan_name Lingua-EN-Sentence
 Name:           perl-Lingua-EN-Sentence
-Version:        0.33
+Version:        0.34
 Release:        0
 License:        Artistic-1.0 OR GPL-1.0-or-later
 Summary:        Split text into sentences
@@ -50,14 +50,14 @@
 %autosetup  -n %{cpan_name}-%{version}
 
 %build
-perl Build.PL installdirs=vendor
-./Build build flags=%{?_smp_mflags}
+perl Build.PL --installdirs=vendor
+./Build build --flags=%{?_smp_mflags}
 
 %check
 ./Build test
 
 %install
-./Build install destdir=%{buildroot} create_packlist=0
+./Build install --destdir=%{buildroot} --create_packlist=0
 %perl_gen_filelist
 
 %files -f %{name}.files

++++++ Lingua-EN-Sentence-0.33.tar.gz -> Lingua-EN-Sentence-0.34.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/Lingua-EN-Sentence-0.33/Changes 
new/Lingua-EN-Sentence-0.34/Changes
--- old/Lingua-EN-Sentence-0.33/Changes 2022-07-05 02:58:57.000000000 +0200
+++ new/Lingua-EN-Sentence-0.34/Changes 2023-06-20 02:47:11.000000000 +0200
@@ -105,7 +105,7 @@
     Declared min version of Perl. Fix for RT bug #124686 
        
 0.32 July 2022
-       fixed bug causing abbreviation followed by '(' to break sentnece, 
reported in github
+       fixed bug causing abbreviation followed by '(' to break sentence, 
reported in github
        dot following an abbreviation now explicitly marked up
        added more acronyms
        improved documentation
@@ -113,4 +113,7 @@
        added verbose moe for debugging
        
 0.33 July 05 2022
-       fixed version numbersin Build.PL and Makefile.PL
+       fixed version numbers in Build.PL and Makefile.PL
+       
+0.34 June 20 2023
+       fixed version numbers in Build.PL and Makefile.PL
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/Lingua-EN-Sentence-0.33/META.json 
new/Lingua-EN-Sentence-0.34/META.json
--- old/Lingua-EN-Sentence-0.33/META.json       2022-07-05 03:41:47.000000000 
+0200
+++ new/Lingua-EN-Sentence-0.34/META.json       2023-06-20 03:56:25.000000000 
+0200
@@ -34,7 +34,7 @@
    "provides" : {
       "Lingua::EN::Sentence" : {
          "file" : "lib/Lingua/EN/Sentence.pm",
-         "version" : "0.33"
+         "version" : "0.34"
       }
    },
    "release_status" : "stable",
@@ -46,6 +46,6 @@
          "url" : "https://github.com/kimryan/Lingua-EN-Sentence";
       }
    },
-   "version" : "0.33",
+   "version" : "0.34",
    "x_serialization_backend" : "JSON::PP version 4.04"
 }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/Lingua-EN-Sentence-0.33/META.yml 
new/Lingua-EN-Sentence-0.34/META.yml
--- old/Lingua-EN-Sentence-0.33/META.yml        2022-07-05 03:41:47.000000000 
+0200
+++ new/Lingua-EN-Sentence-0.34/META.yml        2023-06-20 03:56:25.000000000 
+0200
@@ -16,12 +16,12 @@
 provides:
   Lingua::EN::Sentence:
     file: lib/Lingua/EN/Sentence.pm
-    version: '0.33'
+    version: '0.34'
 requires:
   perl: v5.10.0
   warnings: '1.06'
 resources:
   license: http://dev.perl.org/licenses/
   repository: https://github.com/kimryan/Lingua-EN-Sentence
-version: '0.33'
+version: '0.34'
 x_serialization_backend: 'CPAN::Meta::YAML version 0.018'
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/Lingua-EN-Sentence-0.33/README 
new/Lingua-EN-Sentence-0.34/README
--- old/Lingua-EN-Sentence-0.33/README  2016-08-09 03:22:20.000000000 +0200
+++ new/Lingua-EN-Sentence-0.34/README  2022-07-05 08:39:19.000000000 +0200
@@ -6,16 +6,33 @@
 SYNOPSIS
 
        use Lingua::EN::Sentence qw( get_sentences add_acronyms );
-
+       
        add_acronyms('lt','gen');               ## adding support for 'Lt. Gen.'
-       my $sentences=get_sentences($text);     ## Get the sentences.
-       foreach my $sentence (@$sentences) {
-               ## do something with $sentence
+       my $text = q{
+       A sentence usually ends with a dot, exclamation or question mark 
optionally followed by a space!
+       A string followed by 2 carriage returns denotes a sentence, even though 
it doesn't end in a dot
+       
+       Dots after single letters such as U.S.A. or in numbers like -12.34 will 
not cause a split
+       as well as common abbreviations such as Dr. I. Smith, Ms. A.B. Jones, 
Apr. Calif. Esq.
+       and (some text) ellipsis such as ... or . . are ignored.
+       Some valid cases canot be deteected, such as the answer is X. It cannot 
easily be
+       differentiated from the single letter-dot sequence to abbreviate a 
person's given name.
+       Numbered points within a sentence will not cause a split 1. Like this 
one.
+       See the code for all the rules that apply.
+       This string has 7 sentences.
+       };
+       
+       my $sentences=get_sentences($text);     # Get the sentences.
+       foreach my $sent (@$sentences)
+       {
+               $i++;
+               print("SENTENCE $i:$sent\n");
        }
-    
+
+
 DESCRIPTION
 
-The Lingua::EN::Sentence module contains the function get_sentences, which
+The C<Lingua::EN::Sentence> module contains the function get_sentences, which
 splits text into its constituent sentences, based on a regular expression and a
 list of abbreviations (built in and given).
 
@@ -23,6 +40,8 @@
 segmentations. But some of them are already integrated into this code and are
 being taken care of. Still, if you see that there are words causing the
 get_sentences function to fail, you can add those to the module, so it notices 
them.
+Note that abbreviations are case sensitive, so 'Mrs.' is recognised but not 
'mrs.'
+
   
 
 INSTALLATION
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/Lingua-EN-Sentence-0.33/examples/demo.pl 
new/Lingua-EN-Sentence-0.34/examples/demo.pl
--- old/Lingua-EN-Sentence-0.33/examples/demo.pl        2022-07-04 
08:07:00.000000000 +0200
+++ new/Lingua-EN-Sentence-0.34/examples/demo.pl        2023-06-20 
03:54:05.000000000 +0200
@@ -22,17 +22,20 @@
 This string has 7 sentences.
 };
 
-my $sentences=get_sentences($text);     ## Get the sentences.
-my $num_sentences = (@$sentences);
-my $i;
-print("There are: $num_sentences sentences\n" );
-foreach my $sent (@$sentences)
+my $sentences=get_sentences($text);
+if (defined($sentences))
 {
-    $i++;
-    print("SENTENCE $i:$sent\n");
+    my $num_sentences = (@$sentences);
+    my $i;
+    print("There are: $num_sentences sentences\n" );
+    foreach my $sent (@$sentences)
+    {
+        $i++;
+        print("SENTENCE $i:$sent\n");
+    }
 }
 
- $text = q{First sentence.
+$text = q{First sentence.
 12. point 12
 Some numbers 12.46, -.123,3:.
 Some âutf quotes wrap thisâ âAnd moreâ};
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/Lingua-EN-Sentence-0.33/lib/Lingua/EN/Sentence.pm 
new/Lingua-EN-Sentence-0.34/lib/Lingua/EN/Sentence.pm
--- old/Lingua-EN-Sentence-0.33/lib/Lingua/EN/Sentence.pm       2022-07-05 
03:02:14.000000000 +0200
+++ new/Lingua-EN-Sentence-0.34/lib/Lingua/EN/Sentence.pm       2023-06-20 
02:47:49.000000000 +0200
@@ -1,10 +1,5 @@
 package Lingua::EN::Sentence;
 
-#==============================================================================
-#
-# Start of POD
-#
-#==============================================================================
 
 =head1 NAME
 
@@ -12,30 +7,32 @@
 
 =head1 SYNOPSIS
 
-       use Lingua::EN::Sentence qw( get_sentences add_acronyms );
-       
-       add_acronyms('lt','gen');               ## adding support for 'Lt. Gen.'
-       my $text = q{
-       A sentence usually ends with a dot, exclamation or question mark 
optionally followed by a space!
-       A string followed by 2 carriage returns denotes a sentence, even though 
it doesn't end in a dot
-       
-       Dots after single letters such as U.S.A. or in numbers like -12.34 will 
not cause a split
-       as well as common abbreviations such as Dr. I. Smith, Ms. A.B. Jones, 
Apr. Calif. Esq.
-       and (some text) ellipsis such as ... or . . are ignored.
-       Some valid cases canot be deteected, such as the answer is X. It cannot 
easily be
-       differentiated from the single letter-dot sequence to abbreviate a 
person's given name.
-       Numbered points within a sentence will not cause a split 1. Like this 
one.
-       See the code for all the rules that apply.
-       This string has 7 sentences.
-       };
-       
-       my $sentences=get_sentences($text);     # Get the sentences.
+use Lingua::EN::Sentence qw( get_sentences add_acronyms );
+
+add_acronyms('lt','gen');              ## adding support for 'Lt. Gen.'
+my $text = q{
+A sentence usually ends with a dot, exclamation or question mark optionally 
followed by a space!
+A string followed by 2 carriage returns denotes a sentence, even though it 
doesn't end in a dot
+
+Dots after single letters such as U.S.A. or in numbers like -12.34 will not 
cause a split
+as well as common abbreviations such as Dr. I. Smith, Ms. A.B. Jones, Apr. 
Calif. Esq.
+and (some text) ellipsis such as ... or . . are ignored.
+Some valid cases canot be deteected, such as the answer is X. It cannot easily 
be
+differentiated from the single letter-dot sequence to abbreviate a person's 
given name.
+Numbered points within a sentence will not cause a split 1. Like this one.
+See the code for all the rules that apply.
+This string has 7 sentences.
+};
+
+if (defined($sentences))
+{
+       my $sentences = get_sentences($text);
        foreach my $sent (@$sentences)
        {
                $i++;
                print("SENTENCE $i:$sent\n");
        }
-
+}
 
 =head1 DESCRIPTION
 
@@ -87,7 +84,8 @@
 and returns a reference to an array of sentences that the text has been split
 into. Returned sentences will be trimmed (beginning and end of sentence) of
 white space. Strings with no alpha-numeric characters in them, won't be
-returned as sentences.
+returned as sentences. If no text is supplied, a reference to an empty array
+is returned.
 
 =item add_acronyms( @acronyms )
 
@@ -189,13 +187,6 @@
 
 #==============================================================================
 #
-# End of POD
-#
-#==============================================================================
-
-
-#==============================================================================
-#
 # Pragmas
 #
 #==============================================================================
@@ -219,7 +210,7 @@
 use Carp qw/cluck/;
 use English;
 
-our $VERSION = '0.33';
+our $VERSION = '0.34';
 
 our $LOC;
 if ($OSNAME ne 'android') {
@@ -234,7 +225,7 @@
 @EXPORT_OK = qw( get_sentences add_acronyms get_acronyms set_acronyms get_EOS 
set_EOS set_locale);             
 
 our $VERBOSE = 0; # echo intermediate data transforms, useful for debugging
-our $EOS = "\001"; #"__EOS__";
+our $EOS = "\001";
 our $EOA = '__EOA__';
 
 our $P = q/[\.!?]/;                        # PUNCTUATION
@@ -280,7 +271,12 @@
 #------------------------------------------------------------------------------
 sub get_sentences {
        my ($text) = @_;
-       return [] unless defined $text;
+       
+       unless (defined($text))
+       {
+               return [];
+       }
+       
        $VERBOSE and print("ORIGINAL\n$text\n");
        
        $text = mark_up_abbreviations($text);
@@ -395,15 +391,11 @@
 # Private methods
 #
 #==============================================================================
-
-
-## Please email me any suggestions for optimizing these RegExps.
 sub remove_false_end_of_sentence {
        my ($marked_segment) = @_;
        
        
-##     ## don't do U.S.A., U.K.
-##     $marked_segment=~s/(\.\w$PAP)$EOS/$1/sg; 
+       # don't split U.S.A., U.K.
        $marked_segment=~s/([^-\w]\w$PAP\s)$EOS/$1/sg;
        $marked_segment=~s/([^-\w]\w$P)$EOS/$1/sg;         
 
@@ -434,34 +426,15 @@
        my ($text) = @_;
        
 
-       # $text=~s/(\D\d+)($P)(\s+)/$1$2$EOS$3/sg; # breaks numbered points, 
such as {EOL}1. point one
-
+       # breaks numbered points, such as {EOL}1. point one
        $text=~s/([\w $P]\d)($P)(\s+)/$1$2$EOS$3/sg;
        
        # eg 'end. (' -> 'end. $EOS ('
-       $text=~s/($PAP\s)(\s*\()/$1$EOS$2/gs; # open bracket
-       
+       $text=~s/($PAP\s)(\s*\()/$1$EOS$2/gs; # open bracket    
        $text=~s/('\w$P)(\s)/$1$EOS$2/gs;
 
-
        $text=~s/(\sno\.)(\s+)(?!\d)/$1$EOS$2/gis;
 
-       # split where single capital letter followed by dot makes sense to 
break.
-       # notice these are exceptions to the general rule NOT to split on single
-       # letter.
-       # notice also that single letter M is missing here, due to French 
'mister'
-       # which is represented as M.
-       #
-       # the rule will not split on names beginning or containing 
-       # single capital letter dot in the first or second name
-       # assuming 2 or three word name.
-       
-       # NOT WORKING , it breaks up U.S.A. after U.
-       # Valid cases if single letter thrn dot are rare, such as 'The answer 
is F'.
-       # Can't decipher meaning of this regex
-       # 
$text=~s/(\s[[:lower:]]\w+\s+[^[[:^upper:]M]\.)(?!\s+[[:upper:]]\.)/$1$EOS/sg;
-       
-
        # add EOS when you see "a.m." or "p.m." followed by a capital letter.
        $text=~s/([ap]\.m\.\s+)([[:upper:]])/$1$EOS$2/gs;
 
@@ -484,7 +457,7 @@
        return $cleaned_sentences;
 }
 
-# Replace seuence such as Mr. A. Smith Jnr. with Mr__EOA__ A__EOA__ etc
+# Replace sequence such as Mr. A. Smith Jnr. with Mr__EOA__ A__EOA__ etc
 # This simplifies the code that detects end of sentnees. The marker is
 # replaced with the original dot adter sentence slitting
 
@@ -513,11 +486,4 @@
        return $text;
 }
 
-
-#==============================================================================
-#
-# Return TRUE
-#
-#==============================================================================
-
 1;

commit perl-Lingua-EN-Sentence for openSUSE:Factory

Reply via email to