[M] Change in pysim[master]: pySim.esim.saip: Implement optimized file content encoding

laforge Wed, 28 Jan 2026 02:03:55 -0800

laforge has submitted this change. ( 
https://gerrit.osmocom.org/c/pysim/+/38014?usp=email )


Change subject: pySim.esim.saip: Implement optimized file content encoding
......................................................................

pySim.esim.saip: Implement optimized file content encoding

Make sure we make use of the fill pattern when encoding file contents:
Only encode the differences to the fill pattern of the file, in order
to reduce the profile download size.

Change-Id: I61e4a5e04beba5c9092979fc546292d5ef3d7aad
---
M pySim/esim/saip/__init__.py
M tests/unittests/test_esim_saip.py
2 files changed, 86 insertions(+), 6 deletions(-)

Approvals:
  dexter: Looks good to me, but someone else must approve
  laforge: Looks good to me, approved
  Jenkins Builder: Verified




diff --git a/pySim/esim/saip/__init__.py b/pySim/esim/saip/__init__.py
index 6b6c01c..51b9d1f 100644
--- a/pySim/esim/saip/__init__.py
+++ b/pySim/esim/saip/__init__.py
@@ -21,6 +21,8 @@
 import os
 from typing import Tuple, List, Optional, Dict, Union
 from collections import OrderedDict
+from difflib import SequenceMatcher, Match
+
 import asn1tools
 import zipfile
 from pySim import javacard
@@ -44,6 +46,29 @@

 logger = logging.getLogger(__name__)

+class NonMatch(Match):
+    """Representing a contiguous non-matching block of data; the opposite of 
difflib.Match"""
+    @classmethod
+    def from_matchlist(cls, l: List[Match], size:int) -> List['NonMatch']:
+        """Build a list of non-matching blocks of data from its inverse (list 
of matching blocks).
+        The caller must ensure that the input list is ordered, non-overlapping 
and only contains
+        matches at equal offsets in a and b."""
+        res = []
+        cur = 0
+        for match in l:
+            if match.a != match.b:
+                raise ValueError('only works for equal-offset matches')
+            assert match.a >= cur
+            nm_len = match.a - cur
+            if nm_len > 0:
+                # there's no point in generating zero-lenth non-matching 
sections
+                res.append(cls(a=cur, b=cur, size=nm_len))
+            cur = match.a + match.size
+        if size > cur:
+            res.append(cls(a=cur, b=cur, size=size-cur))
+
+        return res
+
 class Naa:
     """A class defining a Network Access Application (NAA)"""
     name = None
@@ -411,12 +436,38 @@
                 return ValueError("Unknown key '%s' in tuple list" % k)
         return stream.getvalue()

-    def file_content_to_tuples(self) -> List[Tuple]:
-        # FIXME: simplistic approach. needs optimization. We should first 
check if the content
-        # matches the expanded default value from the template. If it does, 
return empty list.
-        # Next, we should compute the diff between the default value and 
self.body, and encode
-        # that as a sequence of fillFileOffset and fillFileContent tuples.
-        return [('fillFileContent', self.body)]
+    def file_content_to_tuples(self, optimize:bool = False) -> List[Tuple]:
+        """Encode the file contents into a list of fillFileContent / 
fillFileOffset tuples that can be fed
+        into the asn.1 encoder.  If optimize is True, it will try to encode 
only the differences from the
+        fillFileContent of the profile template.  Otherwise, the entire file 
contents will be encoded
+        as-is."""
+        if not optimize:
+            # simplistic approach: encode the full file, ignoring the 
template/default
+            return [('fillFileContent', self.body)]
+        # Try to 'compress' the file body, based on the default file contents.
+        if self.template:
+            default = 
self.template.expand_default_value_pattern(length=len(self.body))
+            if not default:
+                sm = SequenceMatcher(a=b'\xff'*len(self.body), b=self.body)
+            else:
+                if default == self.body:
+                    # 100% match: return an empty tuple list to make eUICC use 
the default
+                    return []
+                sm = SequenceMatcher(a=default, b=self.body)
+        else:
+            # no template at all: we can only remove padding
+            sm = SequenceMatcher(a=b'\xff'*len(self.body), b=self.body)
+        matching_blocks = sm.get_matching_blocks()
+        # we can only make use of matches that have the same offset in 'a' and 
'b'
+        matching_blocks = [x for x in matching_blocks if x.size > 0 and x.a == 
x.b]
+        non_matching_blocks = NonMatch.from_matchlist(matching_blocks, 
self.file_size)
+        ret = []
+        cur = 0
+        for block in non_matching_blocks:
+            ret.append(('fillFileOffset', block.a - cur))
+            ret.append(('fillFileContent', 
self.body[block.a:block.a+block.size]))
+            cur += block.size
+        return ret

     def __str__(self) -> str:
         return "File(%s)" % self.pe_name
diff --git a/tests/unittests/test_esim_saip.py 
b/tests/unittests/test_esim_saip.py
index e7e324d..edf6d8d 100755
--- a/tests/unittests/test_esim_saip.py
+++ b/tests/unittests/test_esim_saip.py
@@ -90,5 +90,34 @@
         self.assertTrue(oid.OID('1.0.1') > oid.OID('1.0'))
         self.assertTrue(oid.OID('1.0.2') > oid.OID('1.0.1'))

+class NonMatchTest(unittest.TestCase):
+    def test_nonmatch(self):
+        # non-matches before, in between and after matches
+        match_list = [Match(a=10, b=10, size=5), Match(a=20, b=20, size=4)]
+        nm_list = NonMatch.from_matchlist(match_list, 26)
+        self.assertEqual(nm_list, [NonMatch(a=0, b=0, size=10), NonMatch(a=15, 
b=15, size=5),
+                                   NonMatch(a=24, b=24, size=2)])
+
+    def test_nonmatch_beg(self):
+        # single match at beginning
+        match_list = [Match(a=0, b=0, size=5)]
+        nm_list = NonMatch.from_matchlist(match_list, 20)
+        self.assertEqual(nm_list, [NonMatch(a=5, b=5, size=15)])
+
+    def test_nonmatch_end(self):
+        # single match at end
+        match_list = [Match(a=19, b=19, size=5)]
+        nm_list = NonMatch.from_matchlist(match_list, 24)
+        self.assertEqual(nm_list, [NonMatch(a=0, b=0, size=19)])
+
+    def test_nonmatch_none(self):
+        # no match at all
+        match_list = []
+        nm_list = NonMatch.from_matchlist(match_list, 24)
+        self.assertEqual(nm_list, [NonMatch(a=0, b=0, size=24)])
+
+
+
+
 if __name__ == "__main__":
        unittest.main()

--
To view, visit https://gerrit.osmocom.org/c/pysim/+/38014?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: pysim
Gerrit-Branch: master
Gerrit-Change-Id: I61e4a5e04beba5c9092979fc546292d5ef3d7aad
Gerrit-Change-Number: 38014
Gerrit-PatchSet: 7
Gerrit-Owner: laforge <[email protected]>
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: dexter <[email protected]>
Gerrit-Reviewer: laforge <[email protected]>
Gerrit-Reviewer: neels <[email protected]>

[M] Change in pysim[master]: pySim.esim.saip: Implement optimized file content encoding

Reply via email to