commit python-sas7bdat for openSUSE:Factory

root Mon, 24 Dec 2018 02:48:24 -0800

Hello community,

here is the log from the commit of package python-sas7bdat for openSUSE:Factory 
checked in at 2018-12-24 11:48:02
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-sas7bdat (Old)
 and      /work/SRC/openSUSE:Factory/.python-sas7bdat.new.28833 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-sas7bdat"

Mon Dec 24 11:48:02 2018 rev:3 rq:660761 version:2.2.1

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-sas7bdat/python-sas7bdat.changes  
2018-07-31 16:01:56.179804991 +0200
+++ 
/work/SRC/openSUSE:Factory/.python-sas7bdat.new.28833/python-sas7bdat.changes   
    2018-12-24 11:48:03.489098735 +0100
@@ -1,0 +2,17 @@
+Sat Dec 22 05:24:22 UTC 2018 - Todd R <[email protected]>
+
+- Update to 2.2.1
+  * note how to install
+  * suggest use of 'skip_header'
+  * add documentation around getting column information
+  * use more descriptive variable name for SAS7BDAT instance in README
+  * bump version to fix pypi documentation formatting
+  * revert setup.py
+  * use long_description_content_type
+  * rewrite RDCDecompressor
+  * Add the ability to use file handles, in addition to supplying a path. If 
the file being accessed is not within the file system (for example, if it is 
compressed into a zip or tar file) it cannot be opened from the supplied path. 
This change adds the ability to supply a file handle, and for the module to use 
that, rather than opening the file itself.
+  * fix dist
+  * add encoding argument to convert_file()
+  * add license; update build script 
+
+-------------------------------------------------------------------

Old:
----
  LICENSE
  sas7bdat-2.0.7.tar.gz

New:
----
  sas7bdat-2.2.1.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-sas7bdat.spec ++++++
--- /var/tmp/diff_new_pack.xnhRMg/_old  2018-12-24 11:48:04.057098237 +0100
+++ /var/tmp/diff_new_pack.xnhRMg/_new  2018-12-24 11:48:04.061098233 +0100
@@ -18,21 +18,18 @@
 
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 Name:           python-sas7bdat
-Version:        2.0.7
+Version:        2.2.1
 Release:        0
-# For the license
-%define tag     da1faa90d0b15c2c97a2a8eb86c91c58081bdd86
 Summary:        A sas7bdat file reader for Python
 License:        MIT
 Group:          Development/Languages/Python
 Url:            https://bitbucket.org/jaredhobbs/sas7bdat
 Source:         
https://files.pythonhosted.org/packages/source/s/sas7bdat/sas7bdat-%{version}.tar.gz
-Source10:       https://bitbucket.org/jaredhobbs/sas7bdat/raw/%{tag}/LICENSE
 BuildRequires:  %{python_module devel}
-BuildRequires:  %{python_module six}
+BuildRequires:  %{python_module six >= 1.8.0}
 BuildRequires:  fdupes
 BuildRequires:  python-rpm-macros
-Requires:       python-six
+Requires:       python-six >= 1.8.0
 BuildArch:      noarch
 
 %python_subpackages
@@ -50,7 +47,6 @@
 
 %prep
 %setup -q -n sas7bdat-%{version}
-cp %{SOURCE10} .
 sed -i 's/\r$//' README.md
 
 %build

++++++ sas7bdat-2.0.7.tar.gz -> sas7bdat-2.2.1.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/sas7bdat-2.0.7/LICENSE new/sas7bdat-2.2.1/LICENSE
--- old/sas7bdat-2.0.7/LICENSE  1970-01-01 01:00:00.000000000 +0100
+++ new/sas7bdat-2.2.1/LICENSE  2018-05-24 21:58:10.000000000 +0200
@@ -0,0 +1,19 @@
+Copyright (c) 2015-2018 Jared Hobbs
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/sas7bdat-2.0.7/PKG-INFO new/sas7bdat-2.2.1/PKG-INFO
--- old/sas7bdat-2.0.7/PKG-INFO 2016-01-07 02:29:27.000000000 +0100
+++ new/sas7bdat-2.2.1/PKG-INFO 2018-11-05 06:24:29.000000000 +0100
@@ -1,12 +1,58 @@
 Metadata-Version: 1.1
 Name: sas7bdat
-Version: 2.0.7
+Version: 2.2.1
 Summary: A sas7bdat file reader for Python
 Home-page: https://bitbucket.org/jaredhobbs/sas7bdat
 Author: Jared Hobbs
 Author-email: [email protected]
 License: MIT
-Description: UNKNOWN
+Description: sas7bdat.py
+        ===========
+        
+        This module will read sas7bdat files using pure Python (2.6+, 3+). No
+        SAS software required! The module started out as a port of the R script
+        of the same name found here: https://github.com/BioStatMatt/sas7bdat 
but
+        has since been completely rewritten.
+        
+        Also included with this library is a simple command line script,
+        ``sas7bdat_to_csv``, which converts sas7bdat files to csv files. It 
will
+        also print out header information and meta data using the ``--header``
+        option and it will batch convert files as well. Use the ``--help``
+        option for more information.
+        
+        As is, I’ve successfully tested the script almost three hundred sample
+        files I found on the internet. For the most part, it works well. We can
+        now read compressed files!
+        
+        I’m sure there are more issues that I haven’t come across yet. Please
+        let me know if you come across a data file that isn’t supported and 
I’ll
+        see if I can add support for the file.
+        
+        Usage
+        =====
+        
+        To create a sas7bdat object, simply pass the constructor a file path.
+        The object is iterable so you can read the contents like this:
+        
+        ::
+        
+            #!python
+            from sas7bdat import SAS7BDAT
+            with SAS7BDAT('foo.sas7bdat') as f:
+                for row in f:
+                    print row
+        
+        The values in each row will be a ``string``, ``float``,
+        ``datetime.date``, ``datetime.datetime``, or ``datetime.time`` 
instance.
+        
+        If you’d like to get a pandas DataFrame, use the ``to_data_frame``
+        method:
+        
+        ::
+        
+            #!python
+            df = f.to_data_frame()
+        
 Keywords: sas,sas7bdat,csv,converter
 Platform: UNKNOWN
 Classifier: Development Status :: 5 - Production/Stable
@@ -16,6 +62,7 @@
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
 Classifier: Programming Language :: Python :: 3
 Classifier: Topic :: Text Processing
 Classifier: Topic :: Utilities
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/sas7bdat-2.0.7/sas7bdat.py 
new/sas7bdat-2.2.1/sas7bdat.py
--- old/sas7bdat-2.0.7/sas7bdat.py      2016-01-07 02:27:26.000000000 +0100
+++ new/sas7bdat-2.2.1/sas7bdat.py      2018-11-05 05:56:25.000000000 +0100
@@ -13,6 +13,7 @@
 import platform
 import struct
 import sys
+from codecs import open
 from datetime import datetime, timedelta
 
 import six
@@ -202,186 +203,96 @@
 class RDCDecompressor(Decompressor):
     """
     Decompresses data using the Ross Data Compression algorithm
+    http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/
+    articles/CUJ/1992/9210/ross/ross.htm
     """
-    def bytes_to_bits(self, src, offset, length):
-        result = [0] * (length * 8)
-        for i in xrange(length):
-            b = src[offset + i]
-            for bit in xrange(8):
-                result[8 * i + (7 - bit)] = 0 if ((b & (1 << bit)) == 0) else 1
-        return result
-
-    def ensure_capacity(self, src, capacity):
-        if capacity >= len(src):
-            new_len = max(capacity, 2 * len(src))
-            src.extend([0] * (new_len - len(src)))
-        return src
-
-    def is_short_rle(self, first_byte_of_cb):
-        return first_byte_of_cb in set([0x00, 0x01, 0x02, 0x03, 0x04, 0x05])
-
-    def is_single_byte_marker(self, first_byte_of_cb):
-        return first_byte_of_cb in set([0x02, 0x04, 0x06, 0x08, 0x0A])
-
-    def is_two_bytes_marker(self, double_bytes_cb):
-        return len(double_bytes_cb) == 2 and\
-            ((double_bytes_cb[0] >> 4) & 0xF) > 2
-
-    def is_three_bytes_marker(self, three_byte_marker):
-        flag = three_byte_marker[0] >> 4
-        return len(three_byte_marker) == 3 and (flag & 0xF) in set([1, 2])
-
-    def get_length_of_rle_pattern(self, first_byte_of_cb):
-        if first_byte_of_cb <= 0x05:
-            return first_byte_of_cb + 3
-        return 0
-
-    def get_length_of_one_byte_pattern(self, first_byte_of_cb):
-        return first_byte_of_cb + 14\
-            if self.is_single_byte_marker(first_byte_of_cb) else 0
-
-    def get_length_of_two_bytes_pattern(self, double_bytes_cb):
-        return (double_bytes_cb[0] >> 4) & 0xF
-
-    def get_length_of_three_bytes_pattern(self, p_type, three_byte_marker):
-        if p_type == 1:
-            return 19 + (three_byte_marker[0] & 0xF) +\
-                (three_byte_marker[1] * 16)
-        elif p_type == 2:
-            return three_byte_marker[2] + 16
-        return 0
-
-    def get_offset_for_one_byte_pattern(self, first_byte_of_cb):
-        if first_byte_of_cb == 0x08:
-            return 24
-        elif first_byte_of_cb == 0x0A:
-            return 40
-        return 0
-
-    def get_offset_for_two_bytes_pattern(self, double_bytes_cb):
-        return 3 + (double_bytes_cb[0] & 0xF) + (double_bytes_cb[1] * 16)
-
-    def get_offset_for_three_bytes_pattern(self, triple_bytes_cb):
-        return 3 + (triple_bytes_cb[0] & 0xF) + (triple_bytes_cb[1] * 16)
-
-    def clone_byte(self, b, length):
-        return [b] * length
-
     def decompress_row(self, offset, length, result_length, page):
-        b = self.to_ord
-        c = self.to_chr
-        src_row = [b(x) for x in page[offset:offset + length]]
+        src_row = [self.to_ord(x) for x in page[offset:offset + length]]
         out_row = [0] * result_length
+        ctrl_mask = 0
+        ctrl_bits = 0
         src_offset = 0
         out_offset = 0
+
+        # process each item in src_row
         while src_offset < (len(src_row) - 2):
-            prefix_bits = self.bytes_to_bits(src_row, src_offset, 2)
-            src_offset += 2
-            for bit_index in xrange(16):
-                if src_offset >= len(src_row):
-                    break
-                if prefix_bits[bit_index] == 0:
-                    out_row = self.ensure_capacity(out_row, out_offset)
-                    out_row[out_offset] = src_row[src_offset]
-                    src_offset += 1
-                    out_offset += 1
-                    continue
-                marker_byte = src_row[src_offset]
-                try:
-                    next_byte = src_row[src_offset + 1]
-                except IndexError:
-                    break
-                if self.is_short_rle(marker_byte):
-                    length = self.get_length_of_rle_pattern(marker_byte)
-                    out_row = self.ensure_capacity(
-                        out_row, out_offset + length
-                    )
-                    pattern = self.clone_byte(next_byte, length)
-                    out_row[out_offset:out_offset + length] = pattern
-                    out_offset += length
-                    src_offset += 2
-                    continue
-                elif self.is_single_byte_marker(marker_byte) and not\
-                        ((next_byte & 0xF0) == ((next_byte << 4) & 0xF0)):
-                    length = self.get_length_of_one_byte_pattern(marker_byte)
-                    out_row = self.ensure_capacity(
-                        out_row, out_offset + length
-                    )
-                    back_offset = self.get_offset_for_one_byte_pattern(
-                        marker_byte
-                    )
-                    start = out_offset - back_offset
-                    end = start + length
-                    out_row[out_offset:out_offset + length] =\
-                        out_row[start:end]
-                    src_offset += 1
-                    out_offset += length
-                    continue
-                two_bytes_marker = src_row[src_offset:src_offset + 2]
-                if self.is_two_bytes_marker(two_bytes_marker):
-                    length = self.get_length_of_two_bytes_pattern(
-                        two_bytes_marker
-                    )
-                    out_row = self.ensure_capacity(
-                        out_row, out_offset + length
-                    )
-                    back_offset = self.get_offset_for_two_bytes_pattern(
-                        two_bytes_marker
-                    )
-                    start = out_offset - back_offset
-                    end = start + length
-                    out_row[out_offset:out_offset + length] =\
-                        out_row[start:end]
-                    src_offset += 2
-                    out_offset += length
-                    continue
-                three_bytes_marker = src_row[src_offset:src_offset + 3]
-                if self.is_three_bytes_marker(three_bytes_marker):
-                    p_type = (three_bytes_marker[0] >> 4) & 0x0F
-                    back_offset = 0
-                    if p_type == 2:
-                        back_offset = self.get_offset_for_three_bytes_pattern(
-                            three_bytes_marker
-                        )
-                    length = self.get_length_of_three_bytes_pattern(
-                        p_type, three_bytes_marker
-                    )
-                    out_row = self.ensure_capacity(
-                        out_row, out_offset + length
-                    )
-                    if p_type == 1:
-                        pattern = self.clone_byte(
-                            three_bytes_marker[2], length
-                        )
-                    else:
-                        start = out_offset - back_offset
-                        end = start + length
-                        pattern = out_row[start:end]
-                    out_row[out_offset:out_offset + length] = pattern
-                    src_offset += 3
-                    out_offset += length
-                    continue
-                else:
-                    self.parent.logger.error(
-                        'unknown marker %s at offset %s', src_row[src_offset],
-                        src_offset
-                    )
-                    break
-        return b''.join([c(x) for x in out_row])
+            # get new load of control bits if needed
+            ctrl_mask = ctrl_mask >> 1
+            if ctrl_mask == 0:
+                ctrl_bits = (src_row[src_offset] << 8) +\
+                            src_row[src_offset + 1]
+                src_offset += 2
+                ctrl_mask = 0x8000
+
+            # just copy this char if control bit is zero
+            if (ctrl_bits & ctrl_mask) == 0:
+                out_row[out_offset] = src_row[src_offset]
+                out_offset += 1
+                src_offset += 1
+                continue
+
+            # undo the compression code
+            cmd = (src_row[src_offset] >> 4) & 0x0F
+            cnt = src_row[src_offset] & 0x0F
+            src_offset += 1
+
+            if cmd == 0:  # short rle
+                cnt += 3
+                for k in xrange(cnt):
+                    out_row[out_offset + k] = src_row[src_offset]
+                out_offset += cnt
+                src_offset += 1
+            elif cmd == 1:  # long rle
+                cnt += src_row[src_offset] << 4
+                cnt += 19
+                src_offset += 1
+                for k in xrange(cnt):
+                    out_row[out_offset + k] = src_row[src_offset]
+                out_offset += cnt
+                src_offset += 1
+            elif cmd == 2:  # long pattern
+                ofs = cnt + 3
+                ofs += src_row[src_offset] << 4
+                src_offset += 1
+                cnt = src_row[src_offset]
+                src_offset += 1
+                cnt += 16
+                for k in xrange(cnt):
+                    out_row[out_offset + k] = out_row[out_offset - ofs + k]
+                out_offset += cnt
+            elif cmd >= 3 and cmd <= 15:  # short pattern
+                ofs = cnt + 3
+                ofs += src_row[src_offset] << 4
+                src_offset += 1
+                for k in xrange(cmd):
+                    out_row[out_offset + k] = out_row[out_offset - ofs + k]
+                out_offset += cmd
+            else:
+                self.parent.logger.error(
+                    'unknown marker %s at offset %s', src_row[src_offset],
+                    src_offset
+                )
+                break
+        return b''.join([self.to_chr(x) for x in out_row])
 
 
 class SAS7BDAT(object):
     """
     SAS7BDAT(path[, log_level[, extra_time_format_strings[, \
-extra_date_time_format_strings[, extra_date_format_strings]]]]) -> \
+extra_date_time_format_strings[, extra_date_format_strings[, \
+fh=fh]]]]]) -> \
 SAS7BDAT object
 
-    Open a SAS7BDAT file. The log level are standard logging levels
-    (defaults to logging.INFO).
+    Open a SAS7BDAT file or use an existing file handle.
+    The log level are standard logging levels (defaults to logging.INFO).
 
     If your sas7bdat file uses non-standard format strings for time, datetime,
     or date values, pass those strings into the constructor using the
     appropriate kwarg.
+
+    The file will be opened from the path supplied, unless a file handle
+    is supplied. The file handle should be opened in binary mode for
+    correct operation.
     """
     _open_files = []
     RLE_COMPRESSION = b'SASYZCRL'
@@ -410,7 +321,8 @@
                  skip_header=False,
                  encoding='utf8',
                  encoding_errors='ignore',
-                 align_correction=True):
+                 align_correction=True,
+                 fh=None):
         """
         x.__init__(...) initializes x; see help(type(x)) for signature
         """
@@ -433,7 +345,7 @@
         self.encoding = encoding
         self.encoding_errors = encoding_errors
         self.align_correction = align_correction
-        self._file = open(self.path, 'rb')
+        self._file = fh or open(self.path, 'rb')
         self._open_files.append(self._file)
         self.cached_page = None
         self.current_page_type = None
@@ -745,7 +657,8 @@
                 ).decode(self.encoding, self.encoding_errors))
         return row_elements
 
-    def convert_file(self, out_file, delimiter=',', step_size=100000):
+    def convert_file(self, out_file, delimiter=',', step_size=100000,
+                     encoding=None):
         """
         convert_file(out_file[, delimiter[, step_size]]) -> None
 
@@ -761,7 +674,7 @@
             if out_file == '-':
                 out_f = sys.stdout
             else:
-                out_f = open(out_file, 'w')
+                out_f = open(out_file, 'w', encoding=encoding)
             out = csv.writer(out_f, lineterminator='\n', delimiter=delimiter)
             i = 0
             for i, line in enumerate(self, 1):
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/sas7bdat-2.0.7/setup.py new/sas7bdat-2.2.1/setup.py
--- old/sas7bdat-2.0.7/setup.py 2016-01-07 02:27:50.000000000 +0100
+++ new/sas7bdat-2.2.1/setup.py 2018-11-05 06:24:05.000000000 +0100
@@ -4,6 +4,16 @@
 import sys
 from distutils.core import setup
 
+try:
+    from pypandoc import convert_file
+except ImportError:
+    print('warning: pypandoc not found, could not convert Markdown to RST.')
+
+    def convert_file(filename, to):
+        with open(filename, 'r') as f:
+            data = f.read()
+        return data
+
 
 if sys.version_info < (2, 6):
     print("Sorry, this module only works on 2.6+, 3+")
@@ -11,12 +21,13 @@
 
 
 setup(name='sas7bdat',
-      version='2.0.7',
+      version='2.2.1',
       author='Jared Hobbs',
       author_email='[email protected]',
       license='MIT',
       url='https://bitbucket.org/jaredhobbs/sas7bdat',
       description='A sas7bdat file reader for Python',
+      long_description=convert_file('README.md', 'rst'),
       py_modules=['sas7bdat'],
       scripts=['scripts/sas7bdat_to_csv'],
       install_requires=['six>=1.8.0'],
@@ -28,6 +39,7 @@
           'License :: OSI Approved :: MIT License',
           'Operating System :: OS Independent',
           'Programming Language :: Python :: 2.6',
+          'Programming Language :: Python :: 2.7',
           'Programming Language :: Python :: 3',
           'Topic :: Text Processing',
           'Topic :: Utilities',

commit python-sas7bdat for openSUSE:Factory

Reply via email to