commit python-gnssanalysis for openSUSE:Factory

Source-Sync Tue, 09 Jun 2026 05:30:28 -0700

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package python-gnssanalysis for 
openSUSE:Factory checked in at 2026-06-09 14:27:48
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-gnssanalysis (Old)
 and      /work/SRC/openSUSE:Factory/.python-gnssanalysis.new.2375 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-gnssanalysis"

Tue Jun  9 14:27:48 2026 rev:4 rq:1358119 version:0.0.60

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-gnssanalysis/python-gnssanalysis.changes  
2026-04-28 12:01:33.002422212 +0200
+++ 
/work/SRC/openSUSE:Factory/.python-gnssanalysis.new.2375/python-gnssanalysis.changes
        2026-06-09 14:30:11.857133034 +0200
@@ -1,0 +2,14 @@
+Tue Jun  9 06:09:26 UTC 2026 - Dirk Stoecker <[email protected]>
+
+- update to 0.0.60
+  * Introduced a framework for hashing and testing DataFrames
+    against a baseline
+  * Introduced ability to enter EarthData credentials (for CDDIS
+    downloads) via environment variables rather the .netrc file
+  * Fixed issue with downloading IGS FIN SP3 files from CDDIS -
+    changed default to 15M for sampling rate rather than 05M
+  * NPI-4453 Framework for DataFrame hashing & test baselining
+  * NPI-4495 Earthdata creds via env var
+  * Remove IGS from searching for 05M SP3 files
+
+-------------------------------------------------------------------

Old:
----
  gnssanalysis-0.0.59.tar.gz

New:
----
  gnssanalysis-0.0.60.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-gnssanalysis.spec ++++++
--- /var/tmp/diff_new_pack.V5Z166/_old  2026-06-09 14:30:13.497201089 +0200
+++ /var/tmp/diff_new_pack.V5Z166/_new  2026-06-09 14:30:13.505201421 +0200
@@ -21,7 +21,7 @@
 %{?sle15_python_module_pythons}
 %define pyname gnssanalysis
 Name:           python-%{pyname}
-Version:        0.0.59
+Version:        0.0.60
 Release:        0
 Summary:        GNSS-related functionality from Geoscience Australia
 License:        BSD-3-Clause
@@ -48,6 +48,7 @@
 BuildRequires:  fdupes
 BuildRequires:  python-rpm-macros
 BuildArch:      noarch
+Requires:       alts
 Requires:       python-boto3
 Requires:       python-click
 Requires:       python-hatanaka
@@ -64,7 +65,6 @@
 Requires:       python-tqdm
 Requires:       python-typing_extensions
 Requires:       python-unlzw3
-Requires:       alts
 %python_subpackages
 
 %description

++++++ gnssanalysis-0.0.59.tar.gz -> gnssanalysis-0.0.60.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/gnssanalysis-0.0.59/gnssanalysis/_version.py 
new/gnssanalysis-0.0.60/gnssanalysis/_version.py
--- old/gnssanalysis-0.0.59/gnssanalysis/_version.py    2026-01-30 
06:09:38.000000000 +0100
+++ new/gnssanalysis-0.0.60/gnssanalysis/_version.py    2026-06-09 
05:42:05.000000000 +0200
@@ -25,9 +25,9 @@
     # setup.py/versioneer.py will grep for the variable names, so they must
     # each be defined on a line of their own. _version.py will just call
     # get_keywords().
-    git_refnames = " (tag: 0.0.59)"
-    git_full = "4c0deba3bdf05c5c2dcae79123064b51089a66a3"
-    git_date = "2026-01-30 16:09:38 +1100"
+    git_refnames = " (HEAD -> main, tag: 0.0.60)"
+    git_full = "d0c36425212b0b8f9628d93dae3dd96da975eeda"
+    git_date = "2026-06-09 13:42:05 +1000"
     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
     return keywords
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/gnssanalysis-0.0.59/gnssanalysis/gn_download.py 
new/gnssanalysis-0.0.60/gnssanalysis/gn_download.py
--- old/gnssanalysis-0.0.59/gnssanalysis/gn_download.py 2026-01-30 
06:09:38.000000000 +0100
+++ new/gnssanalysis-0.0.60/gnssanalysis/gn_download.py 2026-06-09 
05:42:05.000000000 +0200
@@ -96,7 +96,7 @@
 
 def get_earthdata_credentials(username: Optional[str] = None, password: 
Optional[str] = None) -> Tuple[str, str]:
     """
-    Get NASA Earthdata credentials from .netrc file or direct parameters.
+    Get NASA Earthdata credentials from direct parameters, env vars, or .netrc 
file.
     :param Optional[str] username: Directly provided username (highest 
priority)
     :param Optional[str] password: Directly provided password (highest 
priority)
     :return Tuple[str, str]: Username and password tuple
@@ -106,9 +106,25 @@
     if username and password:
         logging.debug("Using directly provided NASA Earthdata credentials")
         return username, password
-    # Priority 2: Try to read from .netrc file
+
+    # Priority 2: Try to read from env vars
+    logging.debug("Attempting to pick up NASA Earthdata credentials from env 
vars...")
+    if all(env in _os.environ for env in ["EARTHDATA_USERNAME", 
"EARTHDATA_PASSWORD"]):
+
+        env_user = _os.environ["EARTHDATA_USERNAME"]
+        env_pass = _os.environ["EARTHDATA_PASSWORD"]
+
+        if len(env_user) == 0 or len(env_pass) == 0:
+            raise ValueError("NASA Earthdata username or password found in env 
var appears to be empty")
+
+        logging.debug("NASA Earthdata credentials successfully read from env 
vars")
+        return _os.environ["EARTHDATA_USERNAME"], 
_os.environ["EARTHDATA_PASSWORD"]
+    else:
+        logging.debug("Env vars EARTHDATA_USERNAME or EARTHDATA_PASSWORD were 
not set. Trying netrc...")
+
+    # Priority 3: Try to read from .netrc file
     try:
-        netrc_path = _Path.home() / '.netrc'
+        netrc_path = _Path.home() / ".netrc"
         if netrc_path.exists():
             logging.debug(f"Found .netrc at {netrc_path}")
             netrc_auth = _netrc.netrc()
@@ -121,8 +137,11 @@
     except Exception as e:
         logging.debug(f"Error reading .netrc: {e}")
     # No credentials available
-    raise ValueError("No NASA Earthdata credentials available. Provide 
username/password directly "
-                     f"or set up .netrc file with entry for 
'{EARTHDATA_URL}'.")
+    raise ValueError(
+        "No NASA Earthdata credentials available. Provide username/password 
directly, "
+        "set env vars EARTHDATA_USERNAME and EARTHDATA_PASSWORD, "
+        f"or set up .netrc file with entry for '{EARTHDATA_URL}'."
+    )
 
 
 def upload_with_chunksize_and_meta(
@@ -332,7 +351,7 @@
         "BIA": "01D",
         "SP3": {
             ("COD", "GFZ", "GRG", "IAC", "JAX", "MIT", "WUM"): "05M",
-            ("ESA", "IGS"): {"FIN": "05M", "RAP": "15M", None: "15M"},
+            ("ESA"): {"FIN": "05M", "RAP": "15M", None: "15M"},
             (): "15M",
         },
         "CLK": {
@@ -839,7 +858,7 @@
                 response.raise_for_status()
 
                 # Download the file
-                with open(download_filepath, 'wb') as f:
+                with open(download_filepath, "wb") as f:
                     for chunk in response.iter_content(chunk_size=MB):
                         if chunk:
                             f.write(chunk)
@@ -860,7 +879,7 @@
                 if download_filepath.is_file():
                     download_filepath.unlink()
                 raise
-            backoff = _random.uniform(0.0, 2.0 ** retries)
+            backoff = _random.uniform(0.0, 2.0**retries)
             _warnings.warn(
                 f"Error downloading {filename}: {e} " f"(retry 
{retries}/{max_retries}, backoff {backoff:.1f}s)"
             )
@@ -902,9 +921,7 @@
 
     # Get credentials once for all downloads
     try:
-        earthdata_username, earthdata_password = get_earthdata_credentials(
-            username=username, password=password
-        )
+        earthdata_username, earthdata_password = 
get_earthdata_credentials(username=username, password=password)
     except ValueError as e:
         logging.error(f"Failed to obtain NASA Earthdata credentials: {e}")
         raise
@@ -916,7 +933,7 @@
             url_folder=url_folder,
             output_folder=output_folder,
             username=earthdata_username,
-            password=earthdata_password
+            password=earthdata_password,
         )
 
     with _concurrent.futures.ThreadPoolExecutor() as executor:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/gnssanalysis-0.0.59/gnssanalysis/gn_utils.py 
new/gnssanalysis-0.0.60/gnssanalysis/gn_utils.py
--- old/gnssanalysis-0.0.59/gnssanalysis/gn_utils.py    2026-01-30 
06:09:38.000000000 +0100
+++ new/gnssanalysis-0.0.60/gnssanalysis/gn_utils.py    2026-06-09 
05:42:05.000000000 +0200
@@ -1,15 +1,24 @@
+import hashlib
+import inspect
 import logging as _logging
 import os as _os
+import pickle
 import sys as _sys
 import pathlib as _pathlib
 from time import perf_counter
+import warnings
 
 import click as _click
 
-from typing import Union
+from pandas import DataFrame
+from typing import Literal, Optional, Union
 
 from gnssanalysis.enum_meta_properties import EnumMetaProperties
 
+# Two options, as a convenience feature to allow invoking from the project 
root or the tests subdir.
+UNITTEST_BASELINE_FILES_ROOT_RELATIVE = 
_pathlib.Path("./tests/unittest_baselines")
+UNITTEST_BASELINE_FILES_TESTS_RELATIVE = _pathlib.Path("./unittest_baselines")
+
 
 class StrictMode(metaclass=EnumMetaProperties):
     name: str
@@ -986,3 +995,413 @@
         )
         if self.print_time:
             print(self.readout)
+
+
+def sha256(bytes_to_hash: bytes) -> str:
+    """
+    Convenience wrapper to quickly call hashlib.sha256 and return a hex digest 
string
+    """
+    return hashlib.sha256(bytes_to_hash).hexdigest()
+
+
+class UnitTestBaseliner:
+
+    mode: Literal["baseline", "verify"] = "verify"
+
+    # Unpickling is off by default for security reasons (arbitrary code 
injection via serialised objects)
+    # Enable temporarily when needed to debug a test regression / change, and 
ensure input data is trusted.
+    enable_unpickling: bool = False  # DO NOT commit changes to this
+
+    # Record of (test) functions which have called either baseline or verify 
functions.
+    # If the same function calls twice, this indicates multiple data sets are 
being stored / checked, under a single
+    # name. This will cause the last to overwrite all previous, and we will 
only test that last one.
+    caller_record: set[str] = set()
+
+    @staticmethod
+    def get_paths_for_pickle_and_hash(
+        filename_prefix: str,
+        subdir: Optional[_pathlib.Path] = None,
+    ) -> tuple[_pathlib.Path, _pathlib.Path]:
+
+        cwd: str = _pathlib.Path.cwd().as_posix()
+        # The following is a quality of life feature, allowing test invocation 
from either:
+        #  - the project root dir --> python -m unittest discover -v -s tests
+        #  - the tests subdir     --> python -m unittest discover -v
+        if cwd.endswith("/gnssanalysis"):
+            parent_dir = UNITTEST_BASELINE_FILES_ROOT_RELATIVE
+        elif cwd.endswith("/gnssanalysis/tests"):
+            parent_dir = UNITTEST_BASELINE_FILES_TESTS_RELATIVE
+        else:
+            raise ValueError(
+                f"UnitTestBaseliner invoked in invalid workdir: '{cwd}'. "
+                "It should be run within the top level gnssanalysis project 
dir (preferred), or the tests subdir"
+            )
+
+        if not parent_dir.is_dir():
+            raise ValueError(f"Test baselining dir not found at: 
'{parent_dir.as_posix()}'")
+
+        target_dir = parent_dir / subdir if subdir is not None else parent_dir
+        if not target_dir.is_dir():
+            # Create directory (fail if parent dirs don't exist). We take this 
more conservative approach because if
+            # the baseline directory doesn't exist *where we are looking*, 
that may indicate our workdir is wrong
+            # and we should stop.
+            target_dir.mkdir()
+
+        pickled_list_path = 
_pathlib.Path(f"{target_dir}/{filename_prefix}.pickledlist")
+        pickled_list_hash_path = 
_pathlib.Path(f"{target_dir}/{filename_prefix}.pickledlist_sha256")
+        return (pickled_list_path, pickled_list_hash_path)
+
+    @staticmethod
+    def get_grandparent_caller_id() -> tuple[str, str]:
+        # This function uses Python frame inspection to determine the *2nd 
level* caller's name. I.e. finds
+        # the grandparent class and function on the stack.
+
+        # --- AI declaration ---: This function leverages suggestions from 
Google Gemini.
+
+        # For example, if this is *called by* a function which was itself 
called by TestClk.test_diff_clk(), the
+        # return would be: (TestClk, test_diff_clk)
+
+        # Note, because navigation is simply a question of how far to walk the 
stack, it is important to be mindful
+        # of where you call this from!
+        # I.e. don't call it from within a function which in turn is called by
+        # something, the *caller* of which you want to know about... that 
would be frame -3, not frame -2.
+
+        # The following depicts the typical frame structure of intended usage:
+        # TestClk.test_diff_clk() -> UnitTestBaseliner.verify() -> 
get_caller_names()
+        #         ^Frame -2                            ^Frame -1   ^ current 
frame
+        # We want the name of frame -2, our 'grandparent'.
+
+        # Set up try block to ensure we delete the frame ref created by 
calling this function
+        try:
+            caller_frame = None
+            # The calling function's calling function frame. I.e the frame of 
the grandparent function.
+            # We have to step back two, because the first frame is us, the 
next is the function leveraging us,
+            # and the one after that is whatever called *that* function.
+
+            # Leveraging a lot of linter ignores here, as almost everything in 
these chains can return None, making
+            # it easier and much simpler, to just catch the exceptions.
+            callers_callers_frame = inspect.currentframe().f_back.f_back  # 
type: ignore
+            func_name = callers_callers_frame.f_code.co_name  # type: ignore
+            if "self" in callers_callers_frame.f_locals:  # type: ignore
+                calling_class_name = 
callers_callers_frame.f_locals["self"].__class__.__name__  # type: ignore
+            elif "cls" in callers_callers_frame.f_locals:  # type: ignore
+                calling_class_name = 
callers_callers_frame.f_locals["cls"].__class__.__name__  # type: ignore
+            else:
+                raise AttributeError("Class not found via either self or cls")
+
+            # If nothing has raised an AttributeError yet, we have a class and 
function name.
+            # Check it's not accidentally us:
+            if calling_class_name == __class__.__name__:
+                raise ValueError(
+                    f"Calling error: somehow, the grandparent of 
get_caller_pretty_string() was "
+                    f"us {__class__.__name__}. That shouldn't happen. Got: 
{calling_class_name}"
+                )
+            # TODO can we check if it's a test, or lives in a 'tests' package?
+            # return f"{calling_class_name}.{func_name}"
+            return (calling_class_name, func_name)
+
+        except AttributeError as a_ex:
+            raise ValueError(
+                f"Failed to find name of caller. Please set filename_prefix 
and subdir explicity. Exception: {a_ex}"
+            )
+
+        finally:
+            del caller_frame  # Avoid creating ref cycle and leaking memory. 
I.e. help the garbage collector.
+            # See doc here: 
https://docs.python.org/3/library/inspect.html#inspect.Traceback.positions
+
+    @staticmethod
+    def ensure_unique_objects(objects: list[object]) -> None:
+
+        _logging.debug("Verifying no duplicate object references in object 
list to hash")
+
+        unique_addresses: set[int] = set([id(obj) for obj in objects])
+
+        addr_count = len(unique_addresses)
+        obj_count = len(objects)
+        if addr_count != obj_count:
+            raise ValueError(
+                f"Count of unique addresses ({addr_count}) didn't match length 
of object list ({obj_count}). "
+                "Two references to the same DF / other object may have been 
passed, please investigate!"
+            )
+
+    @staticmethod
+    def create_baseline(  # Was baseline_pickled_df_list_and_hash()
+        current_object_list: list[object],
+        # These are used to describe the calling class and function, and are 
inferred automatically. If needed they
+        # can be explicitly set here:
+        subdir: Optional[_pathlib.Path] = None,
+        filename_prefix: Optional[str] = None,
+    ) -> None:
+
+        if UnitTestBaseliner.mode != "baseline":
+            raise ValueError(
+                "Refusing to create baseline of pickled DFs / objects and 
hash, while not in 'baseline' mode. "
+                "Set UnitTestBaseliner.mode = 'baseline' first"
+            )
+
+        if filename_prefix is None:
+            # Try to determine filename prefix from class name and function 
which is calling us...
+            caller_class, caller_func = 
UnitTestBaseliner.get_grandparent_caller_id()
+            _logging.debug(
+                f"No filename_prefix provided. "
+                f"Using grandparent class and func (found using frame 
inspection): {caller_class}, {caller_func}"
+            )
+            filename_prefix = caller_func
+            subdir = _pathlib.Path(caller_class)
+
+            caller_id = f"{caller_class}.{caller_func}"
+        else:
+            caller_id = filename_prefix
+
+        # Check if we've been called before by this class,function pair (i.e. 
caller_id).
+        # If this is not our first call, continuing will overwrite previous 
results. So we raise.
+        if caller_id in UnitTestBaseliner.caller_record:
+            raise ValueError(
+                f"Multiple calls from '{caller_id}'! Please consolidate your 
dataframes / objects to verify, and "
+                "only pass one list per test function / filename_prefix."
+            )
+        UnitTestBaseliner.caller_record.add(caller_id)
+
+        pickled_objects_path, aggregate_sha256_path = 
UnitTestBaseliner.get_paths_for_pickle_and_hash(
+            filename_prefix, subdir=subdir
+        )
+
+        # Safety check that we did not get two references to the same 
DataFrame / object in the list
+        UnitTestBaseliner.ensure_unique_objects(current_object_list)
+
+        # Structure here is:
+        # pickled_list: bytes -> created from an array of DataFrames / 
objects. Pickled into a single bytes object.
+        # pickled_list_sha256: str -> sha256 hash of the above pickled 
DataFrame / object list.
+
+        current_df_list: list[DataFrame] = [df for df in current_object_list 
if isinstance(df, DataFrame)]
+        if len(current_object_list) > len(current_df_list):
+            warnings.warn(
+                "Creating a unittest baseline containing objects other than 
DataFrames! This can be hash "
+                "verified, but verify() will crash if any changes are 
detected. Please implement support for "
+                "other required object types!"
+            )
+        # TODO other object support to be added here
+
+        pickled_list: bytes = pickle.dumps(current_object_list)
+        pickled_list_sha256: str = hashlib.sha256(pickled_list).hexdigest()
+
+        warnings.warn(
+            "Baselining should only be done supervised (in a dev environment). 
"
+            "If you see this message in a pipeline run, something needs 
fixing!"
+        )
+        _logging.debug(f"About to write baseline: 
'{pickled_objects_path.as_posix()}': {pickled_list_sha256}...")
+
+        with open(aggregate_sha256_path, "wb") as hash_file:
+            hash_file.write(pickled_list_sha256.encode())
+        with open(pickled_objects_path, "wb") as pickled_objects_file:
+            pickled_objects_file.write(pickled_list)
+
+        _logging.info(
+            "TEST BASELINED -->> **Please ensure you commit both pickle and 
hash files with your changes**: "
+            f"'{pickled_objects_path.as_posix()}': {pickled_list_sha256}.\n"
+        )
+
+    @staticmethod
+    def verify(  # Was create_and_verify_pickled_df_list()
+        current_object_list: list[object],
+        # parent_dir: _pathlib.Path = 
BASELINE_DATAFRAME_RECORDS_DIR_ROOT_RELATIVE,
+        # Option to strictly enforce that a baseline must exist for anything 
this function is invoked to check:
+        raise_for_missing_baseline: bool = False,
+        raise_rather_than_continue_for_incorrect_mode: bool = False,
+        # The expected pickled list hash will be read from disk, at a path 
constructed using the name of the
+        # calling class and function. While it should not be necessary, you 
can optionally override the expected hash:
+        expected_pickled_list_sha256: Optional[str] = None,
+        # These are used to describe the calling class and function, and are 
inferred automatically. If needed they
+        # can be explicitly set here:
+        subdir: Optional[_pathlib.Path] = None,
+        filename_prefix: Optional[str] = None,
+    ) -> bool:
+        # Return options:
+        # - True if verification successful.
+        # - False if baseline incomplete or missing (unable to verify). OR, if 
not running as mode != 'verify'
+        # NOTE: Raises for verification failed.
+
+        if UnitTestBaseliner.mode != "verify":
+
+            # TODO could change this to just politely state that it is 
skipping as in baseline mode. But we don't
+            # want to leave things in baseline mode, so...? Is failing tests 
sufficient? Hopefully.
+            if raise_rather_than_continue_for_incorrect_mode:
+                raise ValueError(
+                    "Refusing to run verify method while not in verify mode. "
+                    "Set UnitTestBaseliner.mode = 'verify' first"
+                )
+            warnings.warn(
+                "Refusing to run verify method while not in verify mode. " 
"Set UnitTestBaseliner.mode = 'verify' first"
+            )
+            return False
+
+        # Verify we didn't get passed multiple, overwritten copies of the same 
reference
+        UnitTestBaseliner.ensure_unique_objects(current_object_list)
+
+        if filename_prefix is None:
+            # Try to determine filename prefix from class name and function 
which is calling us...
+            caller_class, caller_func = 
UnitTestBaseliner.get_grandparent_caller_id()
+            _logging.debug(
+                f"No filename_prefix provided. "
+                f"Using grandparent class and func (found using frame 
inspection): {caller_class}, {caller_func}"
+            )
+            filename_prefix = caller_func
+            subdir = _pathlib.Path(caller_class)
+
+            caller_id = f"{caller_class}.{caller_func}"
+        else:
+            caller_id = filename_prefix
+
+        # Check if we've been called before by this class,function pair (i.e. 
caller_id).
+        if caller_id in UnitTestBaseliner.caller_record:
+            raise ValueError(
+                f"Multiple calls from '{caller_id}'! Please consolidate your 
dataframes / objects to validate, and "
+                "only pass one list per test function / filename_prefix."
+            )
+        UnitTestBaseliner.caller_record.add(caller_id)
+
+        # Determine paths on disk...
+        pickled_list_path, pickled_list_hash_path = 
UnitTestBaseliner.get_paths_for_pickle_and_hash(
+            filename_prefix, subdir=subdir
+        )
+
+        # Check if pickled list or hash exist on disk
+        pickle_exists = pickled_list_path.exists()
+        hash_exists = pickled_list_hash_path.exists()
+
+        if hash_exists == False:
+            if raise_for_missing_baseline:
+                raise ValueError(
+                    f"Cannot verify DFs / objects against baseline (hash file: 
{'present' if hash_exists else 'missing'}, "
+                    f"pickled list file: {'present' if pickle_exists else 
'missing'}) "
+                    f"for '{caller_id}'."
+                )
+            warnings.warn(
+                f"Cannot verify DFs / objects against baseline (hash file: 
{'present' if hash_exists else 'missing'}, "
+                f"pickled list file: {'present' if pickle_exists else 
'missing'}) "
+                f"for '{caller_id}'."
+            )
+            return False
+
+        if expected_pickled_list_sha256 is None:  # Expected hash not 
provided, load it from disk
+            # Load old aggregate hash (of pickled list)...
+            _logging.debug(f"No expected hash value provided for 
'{pickled_list_path}', attempting to load...")
+            with open(pickled_list_hash_path, "rb") as pickled_list_hash_file:
+                expected_pickled_list_sha256 = 
pickled_list_hash_file.read().decode()
+
+        # Data ready, now do comparison
+        # Generate pickled list and aggregate hash
+        pickled_list = pickle.dumps(current_object_list)
+        pickled_list_sha256 = sha256(pickled_list)
+
+        if pickled_list_sha256 != expected_pickled_list_sha256:
+            _logging.debug(
+                f"Hashes did not match for '{pickled_list_path}'. Expected: 
{expected_pickled_list_sha256} Actual: {pickled_list_sha256}"
+            )
+            # Load old DataFrames / other objects (pickled list)...
+            with open(pickled_list_path, "rb") as pickled_list_hash_file:
+                pickled_list = pickled_list_hash_file.read()
+
+            # Unpickle if the safety is turned off
+            # CAUTION: deserialising can present arbitrary code execution 
potential. Ensure the data passed in is trustworthy.
+            if UnitTestBaseliner.enable_unpickling != True:
+                raise ValueError(
+                    "Cannot load baselined DataFrames / objects from pickle 
for analysis as unpickling is "
+                    "off (default for security). Temporarily set 
UnitTestBaseliner.enable_unpickling = True to "
+                    "allow deserialisation of old DFs / objects from disk."
+                )
+            warnings.warn(
+                "Unpickling object list from unittest baseline, to create diff 
with current results. This may "
+                "present a security risk, and should NOT be left enabled when 
not needed. Please ensure "
+                "UnitTestBaseliner.enable_unpickling defaults to False"
+            )
+            unpickled_object_list: list[object] = pickle.loads(pickled_list)
+
+            # Filter OLD (baseline) object list by datatype
+            old_df_list: list[DataFrame] = [df for df in unpickled_object_list 
if isinstance(df, DataFrame)]
+            if len(unpickled_object_list) > len(old_df_list):
+                raise NotImplementedError(
+                    "Outputting diffs for non-DataFrame objects during 
verification, is not yet supported"
+                )
+            # TODO filtering to extract other supported datatypes will go here 
in future, rather than the above exception
+
+            # Filter NEW (being verified) object list by datatype
+            current_df_list: list[DataFrame] = [df for df in 
current_object_list if isinstance(df, DataFrame)]
+            if len(current_object_list) > len(current_df_list):
+                raise NotImplementedError(
+                    "Outputting diffs for non-DataFrame objects during 
verification, is not yet supported"
+                )
+            # TODO as above for OLD objects, filtering for NEW objects will go 
here
+
+            # And print out diffs for the DataFrames. This in turn calls the 
index and column diff
+            # utility, if dataframe.diff() raises.
+            UnitTestBaseliner.diff_dfs(old_df_list, current_df_list)
+
+            # TODO when adding other supported object types, calculate diffs 
for them here.
+
+            # Raise to ensure the test fails and this change / regression gets 
investigated
+            raise ValueError("Dataframes / objects did not match baseline. 
Please investigate using above diffs")
+        else:
+            _logging.debug(f"Hashes matched for '{pickled_list_path}': 
{pickled_list_sha256}")
+            return True
+
+    @staticmethod
+    def diff_dfs(old_df_list: list[DataFrame], current_dfs_list: 
list[DataFrame]) -> None:
+
+        old_length = len(old_df_list)
+        current_length = len(current_dfs_list)
+        if old_length != current_length:
+            raise ValueError(
+                f"Unpickled DataFrame list had {old_length} elements, " 
f"whereas the current one has {current_length}"
+            )
+        for i in range(current_length):
+            old_df = old_df_list[i]
+            current_df = current_dfs_list[i]
+
+            _logging.info(f"Diffing DataFrame #{i}...")
+
+            # DF.equals() may be useful, but does not check that the 
row/column index datatypes are the same
+            _logging.info(f"DataFrame.equals(): {current_df.equals(old_df)}")
+
+            try:
+                _logging.info(f"current_dataframe.compare(old_dataframe): 
{current_df.compare(old_df)}")
+            except ValueError:
+                _logging.info(
+                    f"current_dataframe.compare(old_dataframe): FAILED! 
Indexes / columns likely differ. Running diff of those..."
+                )
+                UnitTestBaseliner.diff_indexes_and_columns(old_df, current_df)
+
+    @staticmethod
+    def diff_indexes_and_columns(existing_df: DataFrame, current_df: 
DataFrame) -> None:
+        # Utility function to output diffs of DataFrame indexes and columns, 
as DataFrame.compare() will not run if
+        # they differ.
+
+        # Handle diffing of indexes
+        existing_df_index = existing_df.index.to_list()
+        current_df_index = current_df.index.to_list()
+        index_diff = 
set(existing_df_index).symmetric_difference(current_df_index)
+        if existing_df_index != current_df_index:
+            if len(index_diff) == 0:  # Diff must've been in order, not values
+                _logging.info("Indexes differed in order, but not values. 
Outputting full indexes:")
+                _logging.info(f"Existing DF indexes: 
{str(existing_df.index.to_list())}")
+                _logging.info(f"Current DF indexes: 
{str(current_df.index.to_list())}")
+            else:
+                _logging.info(f"The following index values are in one DF but 
not the other: {str(index_diff)}")
+
+        # Handle diffing of columns
+        existing_df_colums = existing_df.columns.to_list()
+        current_df_columns = current_df.columns.to_list()
+
+        column_diff = 
set(existing_df_colums).symmetric_difference(current_df_columns)
+        if existing_df_colums != current_df_columns:
+            if len(column_diff) == 0:  # Diff must've been in order, not values
+                _logging.info("Columns differed in order, but not values. 
Outputting full column listing:")
+                _logging.info(f"Existing DF columns: 
{str(existing_df.columns.to_list())}")
+                _logging.info(f"Current DF columns: 
{str(current_df.columns.to_list())}")
+            else:
+                _logging.info(f"The following column names are in one DF but 
not the other: {str(column_diff)}")
+
+    # NOTE: for aggregate tests, the revised multi-dataframe functions above 
are suggested
+    @staticmethod
+    def pickle_and_sha256(obj: object) -> str:
+        return sha256(pickle.dumps(obj))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/gnssanalysis-0.0.59/tests/test_utils.py 
new/gnssanalysis-0.0.60/tests/test_utils.py
--- old/gnssanalysis-0.0.59/tests/test_utils.py 2026-01-30 06:09:38.000000000 
+0100
+++ new/gnssanalysis-0.0.60/tests/test_utils.py 2026-06-09 05:42:05.000000000 
+0200
@@ -1,8 +1,11 @@
 import logging
+import os
+import unittest
+from pandas import DataFrame
 from pyfakefs.fake_filesystem_unittest import TestCase
 from pathlib import Path
 
-from gnssanalysis.gn_utils import delete_entire_directory
+from gnssanalysis.gn_utils import UnitTestBaseliner, delete_entire_directory
 import gnssanalysis.gn_utils as ga_utils
 
 
@@ -64,3 +67,129 @@
 
         # Verify
         self.assertEqual(logger_not_output, None)
+
+
+class TestUnitTestBaseliner(unittest.TestCase):
+
+    def test_verify_refusal_in_wrong_mode(self):
+        mode_backup = UnitTestBaseliner.mode
+        try:
+            df = DataFrame(["a", "b", "c"])
+
+            # Baseline (do not commit uncommented!) Note: every function needs 
its own baseline, becuase the
+            # function name determines the filename, unless we override that.
+            # UnitTestBaseliner.mode = "baseline"
+            # UnitTestBaseliner.record_baseline([df])
+
+            # In baseline (write) mode, verify should be refused.
+            UnitTestBaseliner.mode = "baseline"
+
+            with self.assertWarns(Warning) as warning_assessor:
+                self.assertFalse(
+                    UnitTestBaseliner.verify([df]),
+                    "DF / object list verification should not succeed in 
'baseline' mode",
+                )
+            # Ensure the expected warning, and only that warning, was raised
+            captured_warnings = warning_assessor.warnings
+            self.assertEqual(
+                "Refusing to run verify method while not in verify mode. Set 
UnitTestBaseliner.mode = 'verify' first",
+                str(captured_warnings[0].message),
+            )
+            self.assertEqual(
+                len(captured_warnings),
+                1,
+                "Expected exactly 1 warning. Check what other warnings are 
being raised!",
+            )
+
+            # Should succeed in correct mode.
+            UnitTestBaseliner.mode = "verify"
+            self.assertTrue(
+                UnitTestBaseliner.verify([df]),
+                "DF / object list verification should succeed in 'verify' 
mode",
+            )
+        finally:
+            # Ensure flag reset to avoid impacts on other tests (across the 
whole suite)
+            UnitTestBaseliner.mode = mode_backup
+
+    def test_repeat_caller_rejection(self):
+        # These functions determine what files to write/read baselines from, 
based on the identity of the (test)
+        # function that called them. Therefore, calling twice from the same 
function would cause the *same baseline
+        # files* to be read/written for a different part of the unit test.
+        # That would have the effect of:
+        # - in write mode: overwriting the baseline file for a previous part 
of the test function.
+        # - in read mode: repeating verification of the same file against a 
different DF / object list (which would
+        #   likely fail).
+
+        # We're only testing it with the verify function below, but both 
verify and baseline functions use the same
+        # caller check logic, and store the caller record statically in a 
class variable. ?
+
+        df = DataFrame(["a", "b", "c"])
+
+        # Baseline (every function needs its own baseline, becuase the 
function name determines the filename,
+        # unless we override that)
+        # UnitTestBaseliner.mode = "baseline"
+        # UnitTestBaseliner.record_baseline([df])
+
+        self.assertTrue(
+            UnitTestBaseliner.verify([df]),
+            "DF / object list verification should succeed on *first* call from 
a function.",
+        )
+        with self.assertRaises(ValueError):
+            UnitTestBaseliner.verify([df])
+            self.fail("DF / object list verification should fail on 
*second*/repeated calls from a function.")
+
+    def test_duplicate_object_rejection(self):
+
+        # List to aggregate DFs / objects for hashing
+        objects_to_hash: list[object] = []
+
+        df = DataFrame(["a", "b", "c"])  # Let's call this Dataframe 'a'
+        objects_to_hash.extend([df])
+
+        # Overwrite local variable, as often happens in our unit tests
+        df = DataFrame(["b", "c", "d"])  # Let's call this Dataframe 'b'
+
+        # This might look questionable, but is ok, because we saved a 
reference to dataframe 'a' to the list,
+        # before overwriting local var 'df' to point at dataframe 'b'.
+        objects_to_hash.extend([df])
+
+        # Baseline this test (this should only be committed commented out!)
+        # UnitTestBaseliner.mode = "baseline"
+        # UnitTestBaseliner.record_baseline(dfs_to_hash)
+
+        # Will return True if verification succeeded. False if baseline 
missing or mode != verify
+        self.assertTrue(
+            UnitTestBaseliner.verify(objects_to_hash),
+            "DF / object list verification should succeed here (unless 
baseline files are missing, or baselining has been turned on)",
+        )
+
+        # The local variable df still points to the same DF, so now the list 
contains [a,b,b]. This should be an error.
+        objects_to_hash.extend([df])
+        with self.assertRaises(ValueError):
+            UnitTestBaseliner.verify(objects_to_hash)
+
+    def test_caller_identity_fetch(self):
+        def wrapper_function():
+            class_name, func_name = 
UnitTestBaseliner.get_grandparent_caller_id()
+            self.assertEqual(class_name, "TestUnitTestBaseliner")
+            self.assertEqual(func_name, "test_caller_identity_fetch")
+
+        # We have to do this (create an extra stack frame) because the 
function looks for
+        # the *grandparent* caller, not parent caller.
+        wrapper_function()
+
+
+# For use with debugger
+# if __name__ == "__main__":
+
+#     logging.basicConfig(format="%(levelname)s: %(message)s")
+#     logger = logging.getLogger()
+#     logger.setLevel(logging.DEBUG)
+
+#     os.chdir("./tests")
+
+#     baseliner_tests = TestUnitTestBaseliner()
+#     baseliner_tests.test_duplicate_object_rejection()
+#     baseliner_tests.test_verify_refusal_in_wrong_mode
+#     baseliner_tests.test_repeat_caller_rejection()
+#     baseliner_tests.test_caller_identity_fetch()
Binary files 
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist
 and 
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist
 differ
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist_sha256
 
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist_sha256
--- 
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist_sha256
   1970-01-01 01:00:00.000000000 +0100
+++ 
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist_sha256
   2026-06-09 05:42:05.000000000 +0200
@@ -0,0 +1 @@
+6b5020201b08f64a2e7412422e03f94a6e7b0479f3a69a792967cec80b17a08b
\ No newline at end of file
Binary files 
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist
 and 
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist
 differ
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist_sha256
 
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist_sha256
--- 
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist_sha256
      1970-01-01 01:00:00.000000000 +0100
+++ 
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist_sha256
      2026-06-09 05:42:05.000000000 +0200
@@ -0,0 +1 @@
+1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e
\ No newline at end of file
Binary files 
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist
 and 
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist
 differ
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist_sha256
 
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist_sha256
--- 
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist_sha256
 1970-01-01 01:00:00.000000000 +0100
+++ 
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist_sha256
 2026-06-09 05:42:05.000000000 +0200
@@ -0,0 +1 @@
+1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e
\ No newline at end of file

commit python-gnssanalysis for openSUSE:Factory

Reply via email to