Script 'mail_helper' called by obssrc
Hello community,
here is the log from the commit of package python-gnssanalysis for
openSUSE:Factory checked in at 2026-06-09 14:27:48
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-gnssanalysis (Old)
and /work/SRC/openSUSE:Factory/.python-gnssanalysis.new.2375 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-gnssanalysis"
Tue Jun 9 14:27:48 2026 rev:4 rq:1358119 version:0.0.60
Changes:
--------
--- /work/SRC/openSUSE:Factory/python-gnssanalysis/python-gnssanalysis.changes
2026-04-28 12:01:33.002422212 +0200
+++
/work/SRC/openSUSE:Factory/.python-gnssanalysis.new.2375/python-gnssanalysis.changes
2026-06-09 14:30:11.857133034 +0200
@@ -1,0 +2,14 @@
+Tue Jun 9 06:09:26 UTC 2026 - Dirk Stoecker <[email protected]>
+
+- update to 0.0.60
+ * Introduced a framework for hashing and testing DataFrames
+ against a baseline
+ * Introduced ability to enter EarthData credentials (for CDDIS
+ downloads) via environment variables rather the .netrc file
+ * Fixed issue with downloading IGS FIN SP3 files from CDDIS -
+ changed default to 15M for sampling rate rather than 05M
+ * NPI-4453 Framework for DataFrame hashing & test baselining
+ * NPI-4495 Earthdata creds via env var
+ * Remove IGS from searching for 05M SP3 files
+
+-------------------------------------------------------------------
Old:
----
gnssanalysis-0.0.59.tar.gz
New:
----
gnssanalysis-0.0.60.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-gnssanalysis.spec ++++++
--- /var/tmp/diff_new_pack.V5Z166/_old 2026-06-09 14:30:13.497201089 +0200
+++ /var/tmp/diff_new_pack.V5Z166/_new 2026-06-09 14:30:13.505201421 +0200
@@ -21,7 +21,7 @@
%{?sle15_python_module_pythons}
%define pyname gnssanalysis
Name: python-%{pyname}
-Version: 0.0.59
+Version: 0.0.60
Release: 0
Summary: GNSS-related functionality from Geoscience Australia
License: BSD-3-Clause
@@ -48,6 +48,7 @@
BuildRequires: fdupes
BuildRequires: python-rpm-macros
BuildArch: noarch
+Requires: alts
Requires: python-boto3
Requires: python-click
Requires: python-hatanaka
@@ -64,7 +65,6 @@
Requires: python-tqdm
Requires: python-typing_extensions
Requires: python-unlzw3
-Requires: alts
%python_subpackages
%description
++++++ gnssanalysis-0.0.59.tar.gz -> gnssanalysis-0.0.60.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/gnssanalysis-0.0.59/gnssanalysis/_version.py
new/gnssanalysis-0.0.60/gnssanalysis/_version.py
--- old/gnssanalysis-0.0.59/gnssanalysis/_version.py 2026-01-30
06:09:38.000000000 +0100
+++ new/gnssanalysis-0.0.60/gnssanalysis/_version.py 2026-06-09
05:42:05.000000000 +0200
@@ -25,9 +25,9 @@
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
- git_refnames = " (tag: 0.0.59)"
- git_full = "4c0deba3bdf05c5c2dcae79123064b51089a66a3"
- git_date = "2026-01-30 16:09:38 +1100"
+ git_refnames = " (HEAD -> main, tag: 0.0.60)"
+ git_full = "d0c36425212b0b8f9628d93dae3dd96da975eeda"
+ git_date = "2026-06-09 13:42:05 +1000"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/gnssanalysis-0.0.59/gnssanalysis/gn_download.py
new/gnssanalysis-0.0.60/gnssanalysis/gn_download.py
--- old/gnssanalysis-0.0.59/gnssanalysis/gn_download.py 2026-01-30
06:09:38.000000000 +0100
+++ new/gnssanalysis-0.0.60/gnssanalysis/gn_download.py 2026-06-09
05:42:05.000000000 +0200
@@ -96,7 +96,7 @@
def get_earthdata_credentials(username: Optional[str] = None, password:
Optional[str] = None) -> Tuple[str, str]:
"""
- Get NASA Earthdata credentials from .netrc file or direct parameters.
+ Get NASA Earthdata credentials from direct parameters, env vars, or .netrc
file.
:param Optional[str] username: Directly provided username (highest
priority)
:param Optional[str] password: Directly provided password (highest
priority)
:return Tuple[str, str]: Username and password tuple
@@ -106,9 +106,25 @@
if username and password:
logging.debug("Using directly provided NASA Earthdata credentials")
return username, password
- # Priority 2: Try to read from .netrc file
+
+ # Priority 2: Try to read from env vars
+ logging.debug("Attempting to pick up NASA Earthdata credentials from env
vars...")
+ if all(env in _os.environ for env in ["EARTHDATA_USERNAME",
"EARTHDATA_PASSWORD"]):
+
+ env_user = _os.environ["EARTHDATA_USERNAME"]
+ env_pass = _os.environ["EARTHDATA_PASSWORD"]
+
+ if len(env_user) == 0 or len(env_pass) == 0:
+ raise ValueError("NASA Earthdata username or password found in env
var appears to be empty")
+
+ logging.debug("NASA Earthdata credentials successfully read from env
vars")
+ return _os.environ["EARTHDATA_USERNAME"],
_os.environ["EARTHDATA_PASSWORD"]
+ else:
+ logging.debug("Env vars EARTHDATA_USERNAME or EARTHDATA_PASSWORD were
not set. Trying netrc...")
+
+ # Priority 3: Try to read from .netrc file
try:
- netrc_path = _Path.home() / '.netrc'
+ netrc_path = _Path.home() / ".netrc"
if netrc_path.exists():
logging.debug(f"Found .netrc at {netrc_path}")
netrc_auth = _netrc.netrc()
@@ -121,8 +137,11 @@
except Exception as e:
logging.debug(f"Error reading .netrc: {e}")
# No credentials available
- raise ValueError("No NASA Earthdata credentials available. Provide
username/password directly "
- f"or set up .netrc file with entry for
'{EARTHDATA_URL}'.")
+ raise ValueError(
+ "No NASA Earthdata credentials available. Provide username/password
directly, "
+ "set env vars EARTHDATA_USERNAME and EARTHDATA_PASSWORD, "
+ f"or set up .netrc file with entry for '{EARTHDATA_URL}'."
+ )
def upload_with_chunksize_and_meta(
@@ -332,7 +351,7 @@
"BIA": "01D",
"SP3": {
("COD", "GFZ", "GRG", "IAC", "JAX", "MIT", "WUM"): "05M",
- ("ESA", "IGS"): {"FIN": "05M", "RAP": "15M", None: "15M"},
+ ("ESA"): {"FIN": "05M", "RAP": "15M", None: "15M"},
(): "15M",
},
"CLK": {
@@ -839,7 +858,7 @@
response.raise_for_status()
# Download the file
- with open(download_filepath, 'wb') as f:
+ with open(download_filepath, "wb") as f:
for chunk in response.iter_content(chunk_size=MB):
if chunk:
f.write(chunk)
@@ -860,7 +879,7 @@
if download_filepath.is_file():
download_filepath.unlink()
raise
- backoff = _random.uniform(0.0, 2.0 ** retries)
+ backoff = _random.uniform(0.0, 2.0**retries)
_warnings.warn(
f"Error downloading {filename}: {e} " f"(retry
{retries}/{max_retries}, backoff {backoff:.1f}s)"
)
@@ -902,9 +921,7 @@
# Get credentials once for all downloads
try:
- earthdata_username, earthdata_password = get_earthdata_credentials(
- username=username, password=password
- )
+ earthdata_username, earthdata_password =
get_earthdata_credentials(username=username, password=password)
except ValueError as e:
logging.error(f"Failed to obtain NASA Earthdata credentials: {e}")
raise
@@ -916,7 +933,7 @@
url_folder=url_folder,
output_folder=output_folder,
username=earthdata_username,
- password=earthdata_password
+ password=earthdata_password,
)
with _concurrent.futures.ThreadPoolExecutor() as executor:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/gnssanalysis-0.0.59/gnssanalysis/gn_utils.py
new/gnssanalysis-0.0.60/gnssanalysis/gn_utils.py
--- old/gnssanalysis-0.0.59/gnssanalysis/gn_utils.py 2026-01-30
06:09:38.000000000 +0100
+++ new/gnssanalysis-0.0.60/gnssanalysis/gn_utils.py 2026-06-09
05:42:05.000000000 +0200
@@ -1,15 +1,24 @@
+import hashlib
+import inspect
import logging as _logging
import os as _os
+import pickle
import sys as _sys
import pathlib as _pathlib
from time import perf_counter
+import warnings
import click as _click
-from typing import Union
+from pandas import DataFrame
+from typing import Literal, Optional, Union
from gnssanalysis.enum_meta_properties import EnumMetaProperties
+# Two options, as a convenience feature to allow invoking from the project
root or the tests subdir.
+UNITTEST_BASELINE_FILES_ROOT_RELATIVE =
_pathlib.Path("./tests/unittest_baselines")
+UNITTEST_BASELINE_FILES_TESTS_RELATIVE = _pathlib.Path("./unittest_baselines")
+
class StrictMode(metaclass=EnumMetaProperties):
name: str
@@ -986,3 +995,413 @@
)
if self.print_time:
print(self.readout)
+
+
+def sha256(bytes_to_hash: bytes) -> str:
+ """
+ Convenience wrapper to quickly call hashlib.sha256 and return a hex digest
string
+ """
+ return hashlib.sha256(bytes_to_hash).hexdigest()
+
+
+class UnitTestBaseliner:
+
+ mode: Literal["baseline", "verify"] = "verify"
+
+ # Unpickling is off by default for security reasons (arbitrary code
injection via serialised objects)
+ # Enable temporarily when needed to debug a test regression / change, and
ensure input data is trusted.
+ enable_unpickling: bool = False # DO NOT commit changes to this
+
+ # Record of (test) functions which have called either baseline or verify
functions.
+ # If the same function calls twice, this indicates multiple data sets are
being stored / checked, under a single
+ # name. This will cause the last to overwrite all previous, and we will
only test that last one.
+ caller_record: set[str] = set()
+
+ @staticmethod
+ def get_paths_for_pickle_and_hash(
+ filename_prefix: str,
+ subdir: Optional[_pathlib.Path] = None,
+ ) -> tuple[_pathlib.Path, _pathlib.Path]:
+
+ cwd: str = _pathlib.Path.cwd().as_posix()
+ # The following is a quality of life feature, allowing test invocation
from either:
+ # - the project root dir --> python -m unittest discover -v -s tests
+ # - the tests subdir --> python -m unittest discover -v
+ if cwd.endswith("/gnssanalysis"):
+ parent_dir = UNITTEST_BASELINE_FILES_ROOT_RELATIVE
+ elif cwd.endswith("/gnssanalysis/tests"):
+ parent_dir = UNITTEST_BASELINE_FILES_TESTS_RELATIVE
+ else:
+ raise ValueError(
+ f"UnitTestBaseliner invoked in invalid workdir: '{cwd}'. "
+ "It should be run within the top level gnssanalysis project
dir (preferred), or the tests subdir"
+ )
+
+ if not parent_dir.is_dir():
+ raise ValueError(f"Test baselining dir not found at:
'{parent_dir.as_posix()}'")
+
+ target_dir = parent_dir / subdir if subdir is not None else parent_dir
+ if not target_dir.is_dir():
+ # Create directory (fail if parent dirs don't exist). We take this
more conservative approach because if
+ # the baseline directory doesn't exist *where we are looking*,
that may indicate our workdir is wrong
+ # and we should stop.
+ target_dir.mkdir()
+
+ pickled_list_path =
_pathlib.Path(f"{target_dir}/{filename_prefix}.pickledlist")
+ pickled_list_hash_path =
_pathlib.Path(f"{target_dir}/{filename_prefix}.pickledlist_sha256")
+ return (pickled_list_path, pickled_list_hash_path)
+
+ @staticmethod
+ def get_grandparent_caller_id() -> tuple[str, str]:
+ # This function uses Python frame inspection to determine the *2nd
level* caller's name. I.e. finds
+ # the grandparent class and function on the stack.
+
+ # --- AI declaration ---: This function leverages suggestions from
Google Gemini.
+
+ # For example, if this is *called by* a function which was itself
called by TestClk.test_diff_clk(), the
+ # return would be: (TestClk, test_diff_clk)
+
+ # Note, because navigation is simply a question of how far to walk the
stack, it is important to be mindful
+ # of where you call this from!
+ # I.e. don't call it from within a function which in turn is called by
+ # something, the *caller* of which you want to know about... that
would be frame -3, not frame -2.
+
+ # The following depicts the typical frame structure of intended usage:
+ # TestClk.test_diff_clk() -> UnitTestBaseliner.verify() ->
get_caller_names()
+ # ^Frame -2 ^Frame -1 ^ current
frame
+ # We want the name of frame -2, our 'grandparent'.
+
+ # Set up try block to ensure we delete the frame ref created by
calling this function
+ try:
+ caller_frame = None
+ # The calling function's calling function frame. I.e the frame of
the grandparent function.
+ # We have to step back two, because the first frame is us, the
next is the function leveraging us,
+ # and the one after that is whatever called *that* function.
+
+ # Leveraging a lot of linter ignores here, as almost everything in
these chains can return None, making
+ # it easier and much simpler, to just catch the exceptions.
+ callers_callers_frame = inspect.currentframe().f_back.f_back #
type: ignore
+ func_name = callers_callers_frame.f_code.co_name # type: ignore
+ if "self" in callers_callers_frame.f_locals: # type: ignore
+ calling_class_name =
callers_callers_frame.f_locals["self"].__class__.__name__ # type: ignore
+ elif "cls" in callers_callers_frame.f_locals: # type: ignore
+ calling_class_name =
callers_callers_frame.f_locals["cls"].__class__.__name__ # type: ignore
+ else:
+ raise AttributeError("Class not found via either self or cls")
+
+ # If nothing has raised an AttributeError yet, we have a class and
function name.
+ # Check it's not accidentally us:
+ if calling_class_name == __class__.__name__:
+ raise ValueError(
+ f"Calling error: somehow, the grandparent of
get_caller_pretty_string() was "
+ f"us {__class__.__name__}. That shouldn't happen. Got:
{calling_class_name}"
+ )
+ # TODO can we check if it's a test, or lives in a 'tests' package?
+ # return f"{calling_class_name}.{func_name}"
+ return (calling_class_name, func_name)
+
+ except AttributeError as a_ex:
+ raise ValueError(
+ f"Failed to find name of caller. Please set filename_prefix
and subdir explicity. Exception: {a_ex}"
+ )
+
+ finally:
+ del caller_frame # Avoid creating ref cycle and leaking memory.
I.e. help the garbage collector.
+ # See doc here:
https://docs.python.org/3/library/inspect.html#inspect.Traceback.positions
+
+ @staticmethod
+ def ensure_unique_objects(objects: list[object]) -> None:
+
+ _logging.debug("Verifying no duplicate object references in object
list to hash")
+
+ unique_addresses: set[int] = set([id(obj) for obj in objects])
+
+ addr_count = len(unique_addresses)
+ obj_count = len(objects)
+ if addr_count != obj_count:
+ raise ValueError(
+ f"Count of unique addresses ({addr_count}) didn't match length
of object list ({obj_count}). "
+ "Two references to the same DF / other object may have been
passed, please investigate!"
+ )
+
+ @staticmethod
+ def create_baseline( # Was baseline_pickled_df_list_and_hash()
+ current_object_list: list[object],
+ # These are used to describe the calling class and function, and are
inferred automatically. If needed they
+ # can be explicitly set here:
+ subdir: Optional[_pathlib.Path] = None,
+ filename_prefix: Optional[str] = None,
+ ) -> None:
+
+ if UnitTestBaseliner.mode != "baseline":
+ raise ValueError(
+ "Refusing to create baseline of pickled DFs / objects and
hash, while not in 'baseline' mode. "
+ "Set UnitTestBaseliner.mode = 'baseline' first"
+ )
+
+ if filename_prefix is None:
+ # Try to determine filename prefix from class name and function
which is calling us...
+ caller_class, caller_func =
UnitTestBaseliner.get_grandparent_caller_id()
+ _logging.debug(
+ f"No filename_prefix provided. "
+ f"Using grandparent class and func (found using frame
inspection): {caller_class}, {caller_func}"
+ )
+ filename_prefix = caller_func
+ subdir = _pathlib.Path(caller_class)
+
+ caller_id = f"{caller_class}.{caller_func}"
+ else:
+ caller_id = filename_prefix
+
+ # Check if we've been called before by this class,function pair (i.e.
caller_id).
+ # If this is not our first call, continuing will overwrite previous
results. So we raise.
+ if caller_id in UnitTestBaseliner.caller_record:
+ raise ValueError(
+ f"Multiple calls from '{caller_id}'! Please consolidate your
dataframes / objects to verify, and "
+ "only pass one list per test function / filename_prefix."
+ )
+ UnitTestBaseliner.caller_record.add(caller_id)
+
+ pickled_objects_path, aggregate_sha256_path =
UnitTestBaseliner.get_paths_for_pickle_and_hash(
+ filename_prefix, subdir=subdir
+ )
+
+ # Safety check that we did not get two references to the same
DataFrame / object in the list
+ UnitTestBaseliner.ensure_unique_objects(current_object_list)
+
+ # Structure here is:
+ # pickled_list: bytes -> created from an array of DataFrames /
objects. Pickled into a single bytes object.
+ # pickled_list_sha256: str -> sha256 hash of the above pickled
DataFrame / object list.
+
+ current_df_list: list[DataFrame] = [df for df in current_object_list
if isinstance(df, DataFrame)]
+ if len(current_object_list) > len(current_df_list):
+ warnings.warn(
+ "Creating a unittest baseline containing objects other than
DataFrames! This can be hash "
+ "verified, but verify() will crash if any changes are
detected. Please implement support for "
+ "other required object types!"
+ )
+ # TODO other object support to be added here
+
+ pickled_list: bytes = pickle.dumps(current_object_list)
+ pickled_list_sha256: str = hashlib.sha256(pickled_list).hexdigest()
+
+ warnings.warn(
+ "Baselining should only be done supervised (in a dev environment).
"
+ "If you see this message in a pipeline run, something needs
fixing!"
+ )
+ _logging.debug(f"About to write baseline:
'{pickled_objects_path.as_posix()}': {pickled_list_sha256}...")
+
+ with open(aggregate_sha256_path, "wb") as hash_file:
+ hash_file.write(pickled_list_sha256.encode())
+ with open(pickled_objects_path, "wb") as pickled_objects_file:
+ pickled_objects_file.write(pickled_list)
+
+ _logging.info(
+ "TEST BASELINED -->> **Please ensure you commit both pickle and
hash files with your changes**: "
+ f"'{pickled_objects_path.as_posix()}': {pickled_list_sha256}.\n"
+ )
+
+ @staticmethod
+ def verify( # Was create_and_verify_pickled_df_list()
+ current_object_list: list[object],
+ # parent_dir: _pathlib.Path =
BASELINE_DATAFRAME_RECORDS_DIR_ROOT_RELATIVE,
+ # Option to strictly enforce that a baseline must exist for anything
this function is invoked to check:
+ raise_for_missing_baseline: bool = False,
+ raise_rather_than_continue_for_incorrect_mode: bool = False,
+ # The expected pickled list hash will be read from disk, at a path
constructed using the name of the
+ # calling class and function. While it should not be necessary, you
can optionally override the expected hash:
+ expected_pickled_list_sha256: Optional[str] = None,
+ # These are used to describe the calling class and function, and are
inferred automatically. If needed they
+ # can be explicitly set here:
+ subdir: Optional[_pathlib.Path] = None,
+ filename_prefix: Optional[str] = None,
+ ) -> bool:
+ # Return options:
+ # - True if verification successful.
+ # - False if baseline incomplete or missing (unable to verify). OR, if
not running as mode != 'verify'
+ # NOTE: Raises for verification failed.
+
+ if UnitTestBaseliner.mode != "verify":
+
+ # TODO could change this to just politely state that it is
skipping as in baseline mode. But we don't
+ # want to leave things in baseline mode, so...? Is failing tests
sufficient? Hopefully.
+ if raise_rather_than_continue_for_incorrect_mode:
+ raise ValueError(
+ "Refusing to run verify method while not in verify mode. "
+ "Set UnitTestBaseliner.mode = 'verify' first"
+ )
+ warnings.warn(
+ "Refusing to run verify method while not in verify mode. "
"Set UnitTestBaseliner.mode = 'verify' first"
+ )
+ return False
+
+ # Verify we didn't get passed multiple, overwritten copies of the same
reference
+ UnitTestBaseliner.ensure_unique_objects(current_object_list)
+
+ if filename_prefix is None:
+ # Try to determine filename prefix from class name and function
which is calling us...
+ caller_class, caller_func =
UnitTestBaseliner.get_grandparent_caller_id()
+ _logging.debug(
+ f"No filename_prefix provided. "
+ f"Using grandparent class and func (found using frame
inspection): {caller_class}, {caller_func}"
+ )
+ filename_prefix = caller_func
+ subdir = _pathlib.Path(caller_class)
+
+ caller_id = f"{caller_class}.{caller_func}"
+ else:
+ caller_id = filename_prefix
+
+ # Check if we've been called before by this class,function pair (i.e.
caller_id).
+ if caller_id in UnitTestBaseliner.caller_record:
+ raise ValueError(
+ f"Multiple calls from '{caller_id}'! Please consolidate your
dataframes / objects to validate, and "
+ "only pass one list per test function / filename_prefix."
+ )
+ UnitTestBaseliner.caller_record.add(caller_id)
+
+ # Determine paths on disk...
+ pickled_list_path, pickled_list_hash_path =
UnitTestBaseliner.get_paths_for_pickle_and_hash(
+ filename_prefix, subdir=subdir
+ )
+
+ # Check if pickled list or hash exist on disk
+ pickle_exists = pickled_list_path.exists()
+ hash_exists = pickled_list_hash_path.exists()
+
+ if hash_exists == False:
+ if raise_for_missing_baseline:
+ raise ValueError(
+ f"Cannot verify DFs / objects against baseline (hash file:
{'present' if hash_exists else 'missing'}, "
+ f"pickled list file: {'present' if pickle_exists else
'missing'}) "
+ f"for '{caller_id}'."
+ )
+ warnings.warn(
+ f"Cannot verify DFs / objects against baseline (hash file:
{'present' if hash_exists else 'missing'}, "
+ f"pickled list file: {'present' if pickle_exists else
'missing'}) "
+ f"for '{caller_id}'."
+ )
+ return False
+
+ if expected_pickled_list_sha256 is None: # Expected hash not
provided, load it from disk
+ # Load old aggregate hash (of pickled list)...
+ _logging.debug(f"No expected hash value provided for
'{pickled_list_path}', attempting to load...")
+ with open(pickled_list_hash_path, "rb") as pickled_list_hash_file:
+ expected_pickled_list_sha256 =
pickled_list_hash_file.read().decode()
+
+ # Data ready, now do comparison
+ # Generate pickled list and aggregate hash
+ pickled_list = pickle.dumps(current_object_list)
+ pickled_list_sha256 = sha256(pickled_list)
+
+ if pickled_list_sha256 != expected_pickled_list_sha256:
+ _logging.debug(
+ f"Hashes did not match for '{pickled_list_path}'. Expected:
{expected_pickled_list_sha256} Actual: {pickled_list_sha256}"
+ )
+ # Load old DataFrames / other objects (pickled list)...
+ with open(pickled_list_path, "rb") as pickled_list_hash_file:
+ pickled_list = pickled_list_hash_file.read()
+
+ # Unpickle if the safety is turned off
+ # CAUTION: deserialising can present arbitrary code execution
potential. Ensure the data passed in is trustworthy.
+ if UnitTestBaseliner.enable_unpickling != True:
+ raise ValueError(
+ "Cannot load baselined DataFrames / objects from pickle
for analysis as unpickling is "
+ "off (default for security). Temporarily set
UnitTestBaseliner.enable_unpickling = True to "
+ "allow deserialisation of old DFs / objects from disk."
+ )
+ warnings.warn(
+ "Unpickling object list from unittest baseline, to create diff
with current results. This may "
+ "present a security risk, and should NOT be left enabled when
not needed. Please ensure "
+ "UnitTestBaseliner.enable_unpickling defaults to False"
+ )
+ unpickled_object_list: list[object] = pickle.loads(pickled_list)
+
+ # Filter OLD (baseline) object list by datatype
+ old_df_list: list[DataFrame] = [df for df in unpickled_object_list
if isinstance(df, DataFrame)]
+ if len(unpickled_object_list) > len(old_df_list):
+ raise NotImplementedError(
+ "Outputting diffs for non-DataFrame objects during
verification, is not yet supported"
+ )
+ # TODO filtering to extract other supported datatypes will go here
in future, rather than the above exception
+
+ # Filter NEW (being verified) object list by datatype
+ current_df_list: list[DataFrame] = [df for df in
current_object_list if isinstance(df, DataFrame)]
+ if len(current_object_list) > len(current_df_list):
+ raise NotImplementedError(
+ "Outputting diffs for non-DataFrame objects during
verification, is not yet supported"
+ )
+ # TODO as above for OLD objects, filtering for NEW objects will go
here
+
+ # And print out diffs for the DataFrames. This in turn calls the
index and column diff
+ # utility, if dataframe.diff() raises.
+ UnitTestBaseliner.diff_dfs(old_df_list, current_df_list)
+
+ # TODO when adding other supported object types, calculate diffs
for them here.
+
+ # Raise to ensure the test fails and this change / regression gets
investigated
+ raise ValueError("Dataframes / objects did not match baseline.
Please investigate using above diffs")
+ else:
+ _logging.debug(f"Hashes matched for '{pickled_list_path}':
{pickled_list_sha256}")
+ return True
+
+ @staticmethod
+ def diff_dfs(old_df_list: list[DataFrame], current_dfs_list:
list[DataFrame]) -> None:
+
+ old_length = len(old_df_list)
+ current_length = len(current_dfs_list)
+ if old_length != current_length:
+ raise ValueError(
+ f"Unpickled DataFrame list had {old_length} elements, "
f"whereas the current one has {current_length}"
+ )
+ for i in range(current_length):
+ old_df = old_df_list[i]
+ current_df = current_dfs_list[i]
+
+ _logging.info(f"Diffing DataFrame #{i}...")
+
+ # DF.equals() may be useful, but does not check that the
row/column index datatypes are the same
+ _logging.info(f"DataFrame.equals(): {current_df.equals(old_df)}")
+
+ try:
+ _logging.info(f"current_dataframe.compare(old_dataframe):
{current_df.compare(old_df)}")
+ except ValueError:
+ _logging.info(
+ f"current_dataframe.compare(old_dataframe): FAILED!
Indexes / columns likely differ. Running diff of those..."
+ )
+ UnitTestBaseliner.diff_indexes_and_columns(old_df, current_df)
+
+ @staticmethod
+ def diff_indexes_and_columns(existing_df: DataFrame, current_df:
DataFrame) -> None:
+ # Utility function to output diffs of DataFrame indexes and columns,
as DataFrame.compare() will not run if
+ # they differ.
+
+ # Handle diffing of indexes
+ existing_df_index = existing_df.index.to_list()
+ current_df_index = current_df.index.to_list()
+ index_diff =
set(existing_df_index).symmetric_difference(current_df_index)
+ if existing_df_index != current_df_index:
+ if len(index_diff) == 0: # Diff must've been in order, not values
+ _logging.info("Indexes differed in order, but not values.
Outputting full indexes:")
+ _logging.info(f"Existing DF indexes:
{str(existing_df.index.to_list())}")
+ _logging.info(f"Current DF indexes:
{str(current_df.index.to_list())}")
+ else:
+ _logging.info(f"The following index values are in one DF but
not the other: {str(index_diff)}")
+
+ # Handle diffing of columns
+ existing_df_colums = existing_df.columns.to_list()
+ current_df_columns = current_df.columns.to_list()
+
+ column_diff =
set(existing_df_colums).symmetric_difference(current_df_columns)
+ if existing_df_colums != current_df_columns:
+ if len(column_diff) == 0: # Diff must've been in order, not values
+ _logging.info("Columns differed in order, but not values.
Outputting full column listing:")
+ _logging.info(f"Existing DF columns:
{str(existing_df.columns.to_list())}")
+ _logging.info(f"Current DF columns:
{str(current_df.columns.to_list())}")
+ else:
+ _logging.info(f"The following column names are in one DF but
not the other: {str(column_diff)}")
+
+ # NOTE: for aggregate tests, the revised multi-dataframe functions above
are suggested
+ @staticmethod
+ def pickle_and_sha256(obj: object) -> str:
+ return sha256(pickle.dumps(obj))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/gnssanalysis-0.0.59/tests/test_utils.py
new/gnssanalysis-0.0.60/tests/test_utils.py
--- old/gnssanalysis-0.0.59/tests/test_utils.py 2026-01-30 06:09:38.000000000
+0100
+++ new/gnssanalysis-0.0.60/tests/test_utils.py 2026-06-09 05:42:05.000000000
+0200
@@ -1,8 +1,11 @@
import logging
+import os
+import unittest
+from pandas import DataFrame
from pyfakefs.fake_filesystem_unittest import TestCase
from pathlib import Path
-from gnssanalysis.gn_utils import delete_entire_directory
+from gnssanalysis.gn_utils import UnitTestBaseliner, delete_entire_directory
import gnssanalysis.gn_utils as ga_utils
@@ -64,3 +67,129 @@
# Verify
self.assertEqual(logger_not_output, None)
+
+
+class TestUnitTestBaseliner(unittest.TestCase):
+
+ def test_verify_refusal_in_wrong_mode(self):
+ mode_backup = UnitTestBaseliner.mode
+ try:
+ df = DataFrame(["a", "b", "c"])
+
+ # Baseline (do not commit uncommented!) Note: every function needs
its own baseline, becuase the
+ # function name determines the filename, unless we override that.
+ # UnitTestBaseliner.mode = "baseline"
+ # UnitTestBaseliner.record_baseline([df])
+
+ # In baseline (write) mode, verify should be refused.
+ UnitTestBaseliner.mode = "baseline"
+
+ with self.assertWarns(Warning) as warning_assessor:
+ self.assertFalse(
+ UnitTestBaseliner.verify([df]),
+ "DF / object list verification should not succeed in
'baseline' mode",
+ )
+ # Ensure the expected warning, and only that warning, was raised
+ captured_warnings = warning_assessor.warnings
+ self.assertEqual(
+ "Refusing to run verify method while not in verify mode. Set
UnitTestBaseliner.mode = 'verify' first",
+ str(captured_warnings[0].message),
+ )
+ self.assertEqual(
+ len(captured_warnings),
+ 1,
+ "Expected exactly 1 warning. Check what other warnings are
being raised!",
+ )
+
+ # Should succeed in correct mode.
+ UnitTestBaseliner.mode = "verify"
+ self.assertTrue(
+ UnitTestBaseliner.verify([df]),
+ "DF / object list verification should succeed in 'verify'
mode",
+ )
+ finally:
+ # Ensure flag reset to avoid impacts on other tests (across the
whole suite)
+ UnitTestBaseliner.mode = mode_backup
+
+ def test_repeat_caller_rejection(self):
+ # These functions determine what files to write/read baselines from,
based on the identity of the (test)
+ # function that called them. Therefore, calling twice from the same
function would cause the *same baseline
+ # files* to be read/written for a different part of the unit test.
+ # That would have the effect of:
+ # - in write mode: overwriting the baseline file for a previous part
of the test function.
+ # - in read mode: repeating verification of the same file against a
different DF / object list (which would
+ # likely fail).
+
+ # We're only testing it with the verify function below, but both
verify and baseline functions use the same
+ # caller check logic, and store the caller record statically in a
class variable. ?
+
+ df = DataFrame(["a", "b", "c"])
+
+ # Baseline (every function needs its own baseline, becuase the
function name determines the filename,
+ # unless we override that)
+ # UnitTestBaseliner.mode = "baseline"
+ # UnitTestBaseliner.record_baseline([df])
+
+ self.assertTrue(
+ UnitTestBaseliner.verify([df]),
+ "DF / object list verification should succeed on *first* call from
a function.",
+ )
+ with self.assertRaises(ValueError):
+ UnitTestBaseliner.verify([df])
+ self.fail("DF / object list verification should fail on
*second*/repeated calls from a function.")
+
+ def test_duplicate_object_rejection(self):
+
+ # List to aggregate DFs / objects for hashing
+ objects_to_hash: list[object] = []
+
+ df = DataFrame(["a", "b", "c"]) # Let's call this Dataframe 'a'
+ objects_to_hash.extend([df])
+
+ # Overwrite local variable, as often happens in our unit tests
+ df = DataFrame(["b", "c", "d"]) # Let's call this Dataframe 'b'
+
+ # This might look questionable, but is ok, because we saved a
reference to dataframe 'a' to the list,
+ # before overwriting local var 'df' to point at dataframe 'b'.
+ objects_to_hash.extend([df])
+
+ # Baseline this test (this should only be committed commented out!)
+ # UnitTestBaseliner.mode = "baseline"
+ # UnitTestBaseliner.record_baseline(dfs_to_hash)
+
+ # Will return True if verification succeeded. False if baseline
missing or mode != verify
+ self.assertTrue(
+ UnitTestBaseliner.verify(objects_to_hash),
+ "DF / object list verification should succeed here (unless
baseline files are missing, or baselining has been turned on)",
+ )
+
+ # The local variable df still points to the same DF, so now the list
contains [a,b,b]. This should be an error.
+ objects_to_hash.extend([df])
+ with self.assertRaises(ValueError):
+ UnitTestBaseliner.verify(objects_to_hash)
+
+ def test_caller_identity_fetch(self):
+ def wrapper_function():
+ class_name, func_name =
UnitTestBaseliner.get_grandparent_caller_id()
+ self.assertEqual(class_name, "TestUnitTestBaseliner")
+ self.assertEqual(func_name, "test_caller_identity_fetch")
+
+ # We have to do this (create an extra stack frame) because the
function looks for
+ # the *grandparent* caller, not parent caller.
+ wrapper_function()
+
+
+# For use with debugger
+# if __name__ == "__main__":
+
+# logging.basicConfig(format="%(levelname)s: %(message)s")
+# logger = logging.getLogger()
+# logger.setLevel(logging.DEBUG)
+
+# os.chdir("./tests")
+
+# baseliner_tests = TestUnitTestBaseliner()
+# baseliner_tests.test_duplicate_object_rejection()
+# baseliner_tests.test_verify_refusal_in_wrong_mode
+# baseliner_tests.test_repeat_caller_rejection()
+# baseliner_tests.test_caller_identity_fetch()
Binary files
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist
and
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist
differ
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist_sha256
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist_sha256
---
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist_sha256
1970-01-01 01:00:00.000000000 +0100
+++
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist_sha256
2026-06-09 05:42:05.000000000 +0200
@@ -0,0 +1 @@
+6b5020201b08f64a2e7412422e03f94a6e7b0479f3a69a792967cec80b17a08b
\ No newline at end of file
Binary files
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist
and
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist
differ
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist_sha256
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist_sha256
---
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist_sha256
1970-01-01 01:00:00.000000000 +0100
+++
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist_sha256
2026-06-09 05:42:05.000000000 +0200
@@ -0,0 +1 @@
+1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e
\ No newline at end of file
Binary files
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist
and
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist
differ
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist_sha256
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist_sha256
---
old/gnssanalysis-0.0.59/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist_sha256
1970-01-01 01:00:00.000000000 +0100
+++
new/gnssanalysis-0.0.60/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist_sha256
2026-06-09 05:42:05.000000000 +0200
@@ -0,0 +1 @@
+1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e
\ No newline at end of file