On Thu, Feb 06, 2014 at 05:24:12PM +0100, Santi Raffa wrote:
> For inbound data the simplest, safest thing to do is to traverse all
> JSON right after encoding and search for private parameters by key.
>
> This ensures that all consumers of this data get Private values
> transparently and consistently; the serializing methods don't have to
> worry about trying to understand what kind of JSON it is receiving.
> It also minimizes the surface area of codebase that is exposed to the
> values directly.
>
> The downside is it adds a ~5% time overhead to all JSON decoding as
> measured through:
>
> $ time make hs-test-py_compat_types
>
> As far as encoding to JSON is concerned, the serializer will redact
> Private values unless told to do so explicitly (e.g., for tests).
>
> Signed-off-by: Santi Raffa <[email protected]>
> ---
> lib/rpc/client.py | 3 +-
> lib/serializer.py | 78 +++++++++++++++++++--
> src/Ganeti/Constants.hs | 20 ++++++
> test/hs/Test/Ganeti/OpCodes.hs | 5 +-
> test/py/cmdlib/node_unittest.py | 1 -
> test/py/ganeti.serializer_unittest.py | 126
> +++++++++++++++++++++++++++++-----
> 6 files changed, 206 insertions(+), 27 deletions(-)
>
> diff --git a/lib/rpc/client.py b/lib/rpc/client.py
> index 83b7df9..5e2fb93 100644
> --- a/lib/rpc/client.py
> +++ b/lib/rpc/client.py
> @@ -120,7 +120,8 @@ def FormatRequest(method, args, version=None):
> request[KEY_VERSION] = version
>
> # Serialize the request
> - return serializer.DumpJson(request)
> + return serializer.DumpJson(request,
> +
> private_encoder=serializer.EncodeWithPrivateFields)
>
>
> def CallRPCMethod(transport_cb, method, args, version=None):
> diff --git a/lib/serializer.py b/lib/serializer.py
> index 1b3be77..af192be 100644
> --- a/lib/serializer.py
> +++ b/lib/serializer.py
> @@ -40,19 +40,25 @@ import simplejson
>
> from ganeti import errors
> from ganeti import utils
> -
> +from ganeti import constants
>
> _RE_EOLSP = re.compile("[ \t]+$", re.MULTILINE)
>
>
> -def DumpJson(data):
> +def DumpJson(data, private_encoder=None):
> """Serialize a given object.
>
> @param data: the data to serialize
> @return: the string representation of data
> + @param private_encoder: specify L{serializer.EncodeWithPrivateFields} if
> you
> + require the produced JSON to also contain private
> + parameters. Otherwise, they will encode to null.
>
> """
> - encoded = simplejson.dumps(data)
> + if private_encoder is None:
> + # Do not leak private fields by default.
> + private_encoder = EncodeWithoutPrivateFields
> + encoded = simplejson.dumps(data, default=private_encoder)
>
> txt = _RE_EOLSP.sub("", encoded)
> if not txt.endswith("\n"):
> @@ -69,20 +75,65 @@ def LoadJson(txt):
> @raise JSONDecodeError: if L{txt} is not a valid JSON document
>
> """
> - return simplejson.loads(txt)
> + values = simplejson.loads(txt)
> +
> + # Hunt and seek for Private fields and wrap them.
> + WrapPrivateValues(values)
> +
> + return values
> +
> +
> +def WrapPrivateValues(json):
> + """Crawl a JSON decoded structure for private values and wrap them.
> +
> + @param json: the json-decoded value to protect.
> +
> + """
> +
Unnecessary newline.
> + # This function used to be recursive. I use this list to avoid actual
> + # recursion, however, since this is a very high-traffic area.
> + todo = [json]
> +
> + while todo:
> + data = todo.pop()
> +
> + if isinstance(data, list): # Array
> + for item in data:
> + todo.append(item)
> + elif isinstance(data, dict): # Object
> +
> + # This is kind of a kludge, but the only place where we know what
> should
> + # be protected is in opcodes.py, and not in a way that is helpful to
> us,
> + # especially in such a high traffic method; on the other hand, the
> + # Haskell `py_compat_fields` test should complain whenever this check
> + # does not protect fields properly.
> + for field in data:
> + value = data[field]
> + if field in constants.PRIVATE_PARAMETERS_BLACKLIST:
> + if not field.endswith("_cluster"):
> + data[field] = PrivateDict(value)
> + else:
> + for os in data[field]:
> + value[os] = PrivateDict(value[os])
> + else:
> + todo.append(value)
> + else: # Values
> + pass
>
>
> -def DumpSignedJson(data, key, salt=None, key_selector=None):
> +def DumpSignedJson(data, key, salt=None, key_selector=None,
> + private_encoder=None):
> """Serialize a given object and authenticate it.
>
> @param data: the data to serialize
> @param key: shared hmac key
> @param key_selector: name/id that identifies the key (in case there are
> multiple keys in use, e.g. in a multi-cluster environment)
> + @param private_encoder: see L{DumpJson}
> @return: the string representation of data signed by the hmac key
>
> """
> - txt = DumpJson(data)
> + txt = DumpJson(data, private_encoder=private_encoder)
> if salt is None:
> salt = ""
> signed_dict = {
> @@ -113,6 +164,9 @@ def LoadSignedJson(txt, key):
>
> """
> signed_dict = LoadJson(txt)
> +
> + WrapPrivateValues(signed_dict)
> +
> if not isinstance(signed_dict, dict):
> raise errors.SignatureError("Invalid external message")
> try:
> @@ -321,3 +375,15 @@ class PrivateDict(dict):
> for key in self:
> returndict[key] = self[key].Get()
> return returndict
> +
> +
> +def EncodeWithoutPrivateFields(obj):
> + if isinstance(obj, Private):
> + return None
> + raise TypeError(repr(obj) + " is not JSON serializable")
> +
> +
> +def EncodeWithPrivateFields(obj):
> + if isinstance(obj, Private):
> + return obj.Get()
> + raise TypeError(repr(obj) + " is not JSON serializable")
> diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs
> index 9e008fe..7595283 100644
> --- a/src/Ganeti/Constants.hs
> +++ b/src/Ganeti/Constants.hs
> @@ -4773,3 +4773,23 @@ instanceCommunicationNetwork =
> "ganeti:network:communication"
>
> instanceCommunicationNicPrefix :: String
> instanceCommunicationNicPrefix = "ganeti:communication:"
> +
> +-- | Parameters that should be protected
> +--
> +-- Python does not have a type system and can't automatically infer what
> should
> +-- be the resulting type of a JSON request. As a result, it must rely on this
> +-- list of parameter names to protect values correctly.
> +--
> +-- Names ending in _cluster will be treated as dicts of dicts of private
> values.
> +-- Otherwise they are considered dicts of private values.
> +privateParametersBlacklist :: [String]
> +privateParametersBlacklist = [ "osparams_private"
> + , "osparams_secret"
> + , "osparams_private_cluster"
> + ]
> +
> +-- | Warn the user that the logging level is too low for production use.
> +debugModeConfidentialityWarning :: String
> +debugModeConfidentialityWarning =
> + "ALERT: %s started in debug mode.\n\
> + \ Private and secret parameters WILL be logged!\n"
> diff --git a/test/hs/Test/Ganeti/OpCodes.hs b/test/hs/Test/Ganeti/OpCodes.hs
> index 4a502f1..c9be067 100644
> --- a/test/hs/Test/Ganeti/OpCodes.hs
> +++ b/test/hs/Test/Ganeti/OpCodes.hs
> @@ -519,7 +519,10 @@ case_py_compat_types = do
> \ op.Validate(True)\n\
> \encoded = [(op.Summary(), op.__getstate__())\n\
> \ for op in decoded]\n\
> - \print serializer.Dump(encoded)" serialized
> + \print serializer.Dump(\
> + \ encoded,\
> + \ private_encoder=serializer.EncodeWithPrivateFields)"
> + serialized
> >>= checkPythonResult
> let deserialised =
> J.decode py_stdout::J.Result [(String, OpCodes.MetaOpCode)]
> diff --git a/test/py/cmdlib/node_unittest.py b/test/py/cmdlib/node_unittest.py
> index d0ab416..8c5581f 100644
> --- a/test/py/cmdlib/node_unittest.py
> +++ b/test/py/cmdlib/node_unittest.py
> @@ -229,7 +229,6 @@ class TestLUNodeAdd(CmdlibTestCase):
> self.ExecOpCodeExpectOpPrereqError(op, "Readded node doesn't have the
> same"
> " IP address configuration as before")
>
> -
> def testNodeHasSecondaryIpButNotMaster(self):
> self.master.secondary_ip = self.master.primary_ip
>
> diff --git a/test/py/ganeti.serializer_unittest.py
> b/test/py/ganeti.serializer_unittest.py
> index 45a3932..8894c94 100755
> --- a/test/py/ganeti.serializer_unittest.py
> +++ b/test/py/ganeti.serializer_unittest.py
> @@ -23,10 +23,12 @@
>
>
> import unittest
> +import doctest
Keep alphabetical s
> from ganeti import serializer
> from ganeti import errors
> from ganeti import ht
> +from ganeti import objects
Fix alphabetical order.
Rest LGTM. No need to resend.
Thanks,
Jose
> import testutils
>
> @@ -39,23 +41,28 @@ class TestSerializer(testutils.GanetiTestCase):
> 255,
> [1, 2, 3],
> (1, 2, 3),
> - { "1": 2, "foo": "bar", },
> + {"1": 2,
> + "foo": "bar"},
> ["abc", 1, 2, 3, 999,
> {
> "a1": ("Hello", "World"),
> "a2": "This is only a test",
> "a3": None,
> - },
> - {
> - "foo": "bar",
> - },
> - ]
> + "osparams:": serializer.PrivateDict({
> + "foo": 5,
> + })
> + }
> ]
> + ]
>
> def _TestSerializer(self, dump_fn, load_fn):
> + _dump_fn = lambda data: dump_fn(
> + data,
> + private_encoder=serializer.EncodeWithPrivateFields
> + )
> for data in self._TESTDATA:
> - self.failUnless(dump_fn(data).endswith("\n"))
> - self.assertEqualValues(load_fn(dump_fn(data)), data)
> + self.failUnless(_dump_fn(data).endswith("\n"))
> + self.assertEqualValues(load_fn(_dump_fn(data)), data)
>
> def testGeneric(self):
> self._TestSerializer(serializer.Dump, serializer.Load)
> @@ -70,30 +77,35 @@ class TestSerializer(testutils.GanetiTestCase):
> self._TestSigned(serializer.DumpSignedJson, serializer.LoadSignedJson)
>
> def _TestSigned(self, dump_fn, load_fn):
> + _dump_fn = lambda *args, **kwargs: dump_fn(
> + *args,
> + private_encoder=serializer.EncodeWithPrivateFields,
> + **kwargs
> + )
> for data in self._TESTDATA:
> - self.assertEqualValues(load_fn(dump_fn(data, "mykey"), "mykey"),
> + self.assertEqualValues(load_fn(_dump_fn(data, "mykey"), "mykey"),
> (data, ""))
> - self.assertEqualValues(load_fn(dump_fn(data, "myprivatekey",
> - salt="mysalt"),
> + self.assertEqualValues(load_fn(_dump_fn(data, "myprivatekey",
> + salt="mysalt"),
> "myprivatekey"),
> (data, "mysalt"))
>
> keydict = {
> "mykey_id": "myprivatekey",
> }
> - self.assertEqualValues(load_fn(dump_fn(data, "myprivatekey",
> - salt="mysalt",
> - key_selector="mykey_id"),
> + self.assertEqualValues(load_fn(_dump_fn(data, "myprivatekey",
> + salt="mysalt",
> + key_selector="mykey_id"),
> keydict.get),
> (data, "mysalt"))
> self.assertRaises(errors.SignatureError, load_fn,
> - dump_fn(data, "myprivatekey",
> - salt="mysalt",
> - key_selector="mykey_id"),
> + _dump_fn(data, "myprivatekey",
> + salt="mysalt",
> + key_selector="mykey_id"),
> {}.get)
>
> self.assertRaises(errors.SignatureError, load_fn,
> - dump_fn("test", "myprivatekey"),
> + _dump_fn("test", "myprivatekey"),
> "myotherkey")
>
> self.assertRaises(errors.SignatureError, load_fn,
> @@ -131,5 +143,83 @@ class TestLoadAndVerifyJson(unittest.TestCase):
> self.assertEqual(serializer.LoadAndVerifyJson("\"Foo\"", ht.TAny), "Foo")
>
>
> +class TestPrivate(unittest.TestCase):
> +
> + def testEquality(self):
> + pDict = serializer.PrivateDict()
> + pDict["bar"] = "egg"
> + nDict = {"bar": "egg"}
> + self.assertEqual(pDict, nDict, "PrivateDict-dict equality failure")
> +
> + def testPrivateDictUnprivate(self):
> + pDict = serializer.PrivateDict()
> + pDict["bar"] = "egg"
> + uDict = pDict.Unprivate()
> + nDict = {"bar": "egg"}
> + self.assertEquals(type(uDict), dict,
> + "PrivateDict.Unprivate() did not return a dict")
> + self.assertEqual(pDict, uDict, "PrivateDict.Unprivate() equality
> failure")
> + self.assertEqual(nDict, uDict, "PrivateDict.Unprivate() failed to
> return")
> +
> + def testAttributeTransparency(self):
> + class Dummy(object):
> + pass
> +
> + dummy = Dummy()
> + dummy.bar = "egg"
> + pDummy = serializer.Private(dummy)
> + self.assertEqual(pDummy.bar, "egg", "Failed to access attribute of
> Private")
> +
> + def testCallTransparency(self):
> + foo = serializer.Private("egg")
> + self.assertEqual(foo.upper(), "EGG", "Failed to call Private instance")
> +
> + def testFillDict(self):
> + pDict = serializer.PrivateDict()
> + pDict["bar"] = "egg"
> + self.assertEqual(pDict, objects.FillDict({}, pDict))
> +
> + def testLeak(self):
> + pDict = serializer.PrivateDict()
> + pDict["bar"] = "egg"
> + self.assertNotIn("egg", str(pDict), "Value leaked in str(PrivateDict)")
> + self.assertNotIn("egg", repr(pDict), "Value leaked in repr(PrivateDict)")
> + self.assertNotIn("egg", "{0}".format(pDict),
> + "Value leaked in PrivateDict.__format__")
> + self.assertNotIn("egg", serializer.Dump(pDict),
> + "Value leaked in serializer.Dump(PrivateDict)")
> +
> + def testProperAccess(self):
> + pDict = serializer.PrivateDict()
> + pDict["bar"] = "egg"
> +
> + self.assertIs("egg", pDict["bar"].Get(),
> + "Value not returned by Private.Get()")
> + self.assertIs("egg", pDict.GetPrivate("bar"),
> + "Value not returned by Private.GetPrivate()")
> + self.assertIs("egg", pDict.Unprivate()["bar"],
> + "Value not returned by PrivateDict.Unprivate()")
> + self.assertIn(
> + "egg",
> + serializer.Dump(pDict,
> + private_encoder=serializer.EncodeWithPrivateFields)
> + )
> +
> + def testDictGet(self):
> + self.assertIs("tar", serializer.PrivateDict().GetPrivate("bar", "tar"),
> + "Private.GetPrivate() did not handle the default case")
> +
> + def testZeronessPrivate(self):
> + self.assertTrue(serializer.Private("foo"),
> + "Private of non-empty string is false")
> + self.assertFalse(serializer.Private(""), "Private empty string is true")
> +
> +
> +class TestCheckDoctests(unittest.TestCase):
> +
> + def testCheckSerializer(self):
> + results = doctest.testmod(serializer)
> + self.assertEquals(results.failed, 0, "Doctest failures detected")
> +
> if __name__ == "__main__":
> testutils.GanetiTestProgram()
> --
> 1.9.0.rc1.175.g0b1dcb5
>
--
Jose Antonio Lopes
Ganeti Engineering
Google Germany GmbH
Dienerstr. 12, 80331, München
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg
Geschäftsführer: Graham Law, Christine Elizabeth Flores
Steuernummer: 48/725/00206
Umsatzsteueridentifikationsnummer: DE813741370