On Thu, Feb 06, 2014 at 05:24:12PM +0100, Santi Raffa wrote:
> For inbound data the simplest, safest thing to do is to traverse all
> JSON right after encoding and search for private parameters by key.
> 
> This ensures that all consumers of this data get Private values
> transparently and consistently; the serializing methods don't have to
> worry about trying to understand what kind of JSON it is receiving.
> It also minimizes the surface area of codebase that is exposed to the
> values directly.
> 
> The downside is it adds a ~5% time overhead to all JSON decoding as
> measured through:
> 
> $ time make hs-test-py_compat_types
> 
> As far as encoding to JSON is concerned, the serializer will redact
> Private values unless told to do so explicitly (e.g., for tests).
> 
> Signed-off-by: Santi Raffa <[email protected]>
> ---
>  lib/rpc/client.py                     |   3 +-
>  lib/serializer.py                     |  78 +++++++++++++++++++--
>  src/Ganeti/Constants.hs               |  20 ++++++
>  test/hs/Test/Ganeti/OpCodes.hs        |   5 +-
>  test/py/cmdlib/node_unittest.py       |   1 -
>  test/py/ganeti.serializer_unittest.py | 126 
> +++++++++++++++++++++++++++++-----
>  6 files changed, 206 insertions(+), 27 deletions(-)
> 
> diff --git a/lib/rpc/client.py b/lib/rpc/client.py
> index 83b7df9..5e2fb93 100644
> --- a/lib/rpc/client.py
> +++ b/lib/rpc/client.py
> @@ -120,7 +120,8 @@ def FormatRequest(method, args, version=None):
>      request[KEY_VERSION] = version
>  
>    # Serialize the request
> -  return serializer.DumpJson(request)
> +  return serializer.DumpJson(request,
> +                             
> private_encoder=serializer.EncodeWithPrivateFields)
>  
>  
>  def CallRPCMethod(transport_cb, method, args, version=None):
> diff --git a/lib/serializer.py b/lib/serializer.py
> index 1b3be77..af192be 100644
> --- a/lib/serializer.py
> +++ b/lib/serializer.py
> @@ -40,19 +40,25 @@ import simplejson
>  
>  from ganeti import errors
>  from ganeti import utils
> -
> +from ganeti import constants
>  
>  _RE_EOLSP = re.compile("[ \t]+$", re.MULTILINE)
>  
>  
> -def DumpJson(data):
> +def DumpJson(data, private_encoder=None):
>    """Serialize a given object.
>  
>    @param data: the data to serialize
>    @return: the string representation of data
> +  @param private_encoder: specify L{serializer.EncodeWithPrivateFields} if 
> you
> +                          require the produced JSON to also contain private
> +                          parameters. Otherwise, they will encode to null.
>  
>    """
> -  encoded = simplejson.dumps(data)
> +  if private_encoder is None:
> +    # Do not leak private fields by default.
> +    private_encoder = EncodeWithoutPrivateFields
> +  encoded = simplejson.dumps(data, default=private_encoder)
>  
>    txt = _RE_EOLSP.sub("", encoded)
>    if not txt.endswith("\n"):
> @@ -69,20 +75,65 @@ def LoadJson(txt):
>    @raise JSONDecodeError: if L{txt} is not a valid JSON document
>  
>    """
> -  return simplejson.loads(txt)
> +  values = simplejson.loads(txt)
> +
> +  # Hunt and seek for Private fields and wrap them.
> +  WrapPrivateValues(values)
> +
> +  return values
> +
> +
> +def WrapPrivateValues(json):
> +  """Crawl a JSON decoded structure for private values and wrap them.
> +
> +  @param json: the json-decoded value to protect.
> +
> +  """
> +

Unnecessary newline.

> +  # This function used to be recursive. I use this list to avoid actual
> +  # recursion, however, since this is a very high-traffic area.
> +  todo = [json]
> +
> +  while todo:
> +    data = todo.pop()
> +
> +    if isinstance(data, list): # Array
> +      for item in data:
> +        todo.append(item)
> +    elif isinstance(data, dict): # Object
> +
> +      # This is kind of a kludge, but the only place where we know what 
> should
> +      # be protected is in opcodes.py, and not in a way that is helpful to 
> us,
> +      # especially in such a high traffic method; on the other hand, the
> +      # Haskell `py_compat_fields` test should complain whenever this check
> +      # does not protect fields properly.
> +      for field in data:
> +        value = data[field]
> +        if field in constants.PRIVATE_PARAMETERS_BLACKLIST:
> +          if not field.endswith("_cluster"):
> +            data[field] = PrivateDict(value)
> +          else:
> +            for os in data[field]:
> +              value[os] = PrivateDict(value[os])
> +        else:
> +          todo.append(value)
> +    else: # Values
> +      pass
>  
>  
> -def DumpSignedJson(data, key, salt=None, key_selector=None):
> +def DumpSignedJson(data, key, salt=None, key_selector=None,
> +                   private_encoder=None):
>    """Serialize a given object and authenticate it.
>  
>    @param data: the data to serialize
>    @param key: shared hmac key
>    @param key_selector: name/id that identifies the key (in case there are
>      multiple keys in use, e.g. in a multi-cluster environment)
> +  @param private_encoder: see L{DumpJson}
>    @return: the string representation of data signed by the hmac key
>  
>    """
> -  txt = DumpJson(data)
> +  txt = DumpJson(data, private_encoder=private_encoder)
>    if salt is None:
>      salt = ""
>    signed_dict = {
> @@ -113,6 +164,9 @@ def LoadSignedJson(txt, key):
>  
>    """
>    signed_dict = LoadJson(txt)
> +
> +  WrapPrivateValues(signed_dict)
> +
>    if not isinstance(signed_dict, dict):
>      raise errors.SignatureError("Invalid external message")
>    try:
> @@ -321,3 +375,15 @@ class PrivateDict(dict):
>      for key in self:
>        returndict[key] = self[key].Get()
>      return returndict
> +
> +
> +def EncodeWithoutPrivateFields(obj):
> +  if isinstance(obj, Private):
> +    return None
> +  raise TypeError(repr(obj) + " is not JSON serializable")
> +
> +
> +def EncodeWithPrivateFields(obj):
> +  if isinstance(obj, Private):
> +    return obj.Get()
> +  raise TypeError(repr(obj) + " is not JSON serializable")
> diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs
> index 9e008fe..7595283 100644
> --- a/src/Ganeti/Constants.hs
> +++ b/src/Ganeti/Constants.hs
> @@ -4773,3 +4773,23 @@ instanceCommunicationNetwork = 
> "ganeti:network:communication"
>  
>  instanceCommunicationNicPrefix :: String
>  instanceCommunicationNicPrefix = "ganeti:communication:"
> +
> +-- | Parameters that should be protected
> +--
> +-- Python does not have a type system and can't automatically infer what 
> should
> +-- be the resulting type of a JSON request. As a result, it must rely on this
> +-- list of parameter names to protect values correctly.
> +--
> +-- Names ending in _cluster will be treated as dicts of dicts of private 
> values.
> +-- Otherwise they are considered dicts of private values.
> +privateParametersBlacklist :: [String]
> +privateParametersBlacklist = [ "osparams_private"
> +                             , "osparams_secret"
> +                             , "osparams_private_cluster"
> +                             ]
> +
> +-- | Warn the user that the logging level is too low for production use.
> +debugModeConfidentialityWarning :: String
> +debugModeConfidentialityWarning =
> +  "ALERT: %s started in debug mode.\n\
> +  \ Private and secret parameters WILL be logged!\n"
> diff --git a/test/hs/Test/Ganeti/OpCodes.hs b/test/hs/Test/Ganeti/OpCodes.hs
> index 4a502f1..c9be067 100644
> --- a/test/hs/Test/Ganeti/OpCodes.hs
> +++ b/test/hs/Test/Ganeti/OpCodes.hs
> @@ -519,7 +519,10 @@ case_py_compat_types = do
>                 \  op.Validate(True)\n\
>                 \encoded = [(op.Summary(), op.__getstate__())\n\
>                 \           for op in decoded]\n\
> -               \print serializer.Dump(encoded)" serialized
> +               \print serializer.Dump(\
> +               \  encoded,\
> +               \  private_encoder=serializer.EncodeWithPrivateFields)"
> +               serialized
>       >>= checkPythonResult
>    let deserialised =
>          J.decode py_stdout::J.Result [(String, OpCodes.MetaOpCode)]
> diff --git a/test/py/cmdlib/node_unittest.py b/test/py/cmdlib/node_unittest.py
> index d0ab416..8c5581f 100644
> --- a/test/py/cmdlib/node_unittest.py
> +++ b/test/py/cmdlib/node_unittest.py
> @@ -229,7 +229,6 @@ class TestLUNodeAdd(CmdlibTestCase):
>      self.ExecOpCodeExpectOpPrereqError(op, "Readded node doesn't have the 
> same"
>                                         " IP address configuration as before")
>  
> -
>    def testNodeHasSecondaryIpButNotMaster(self):
>      self.master.secondary_ip = self.master.primary_ip
>  
> diff --git a/test/py/ganeti.serializer_unittest.py 
> b/test/py/ganeti.serializer_unittest.py
> index 45a3932..8894c94 100755
> --- a/test/py/ganeti.serializer_unittest.py
> +++ b/test/py/ganeti.serializer_unittest.py
> @@ -23,10 +23,12 @@
>  
>  
>  import unittest
> +import doctest

Keep alphabetical s

>  from ganeti import serializer
>  from ganeti import errors
>  from ganeti import ht
> +from ganeti import objects

Fix alphabetical order.

Rest LGTM. No need to resend.

Thanks,
Jose

>  import testutils
>  
> @@ -39,23 +41,28 @@ class TestSerializer(testutils.GanetiTestCase):
>      255,
>      [1, 2, 3],
>      (1, 2, 3),
> -    { "1": 2, "foo": "bar", },
> +    {"1": 2,
> +     "foo": "bar"},
>      ["abc", 1, 2, 3, 999,
>        {
>          "a1": ("Hello", "World"),
>          "a2": "This is only a test",
>          "a3": None,
> -        },
> -      {
> -        "foo": "bar",
> -        },
> -      ]
> +        "osparams:": serializer.PrivateDict({
> +                      "foo": 5,
> +                     })
> +      }
>      ]
> +  ]
>  
>    def _TestSerializer(self, dump_fn, load_fn):
> +    _dump_fn = lambda data: dump_fn(
> +      data,
> +      private_encoder=serializer.EncodeWithPrivateFields
> +    )
>      for data in self._TESTDATA:
> -      self.failUnless(dump_fn(data).endswith("\n"))
> -      self.assertEqualValues(load_fn(dump_fn(data)), data)
> +      self.failUnless(_dump_fn(data).endswith("\n"))
> +      self.assertEqualValues(load_fn(_dump_fn(data)), data)
>  
>    def testGeneric(self):
>      self._TestSerializer(serializer.Dump, serializer.Load)
> @@ -70,30 +77,35 @@ class TestSerializer(testutils.GanetiTestCase):
>      self._TestSigned(serializer.DumpSignedJson, serializer.LoadSignedJson)
>  
>    def _TestSigned(self, dump_fn, load_fn):
> +    _dump_fn = lambda *args, **kwargs: dump_fn(
> +      *args,
> +      private_encoder=serializer.EncodeWithPrivateFields,
> +      **kwargs
> +    )
>      for data in self._TESTDATA:
> -      self.assertEqualValues(load_fn(dump_fn(data, "mykey"), "mykey"),
> +      self.assertEqualValues(load_fn(_dump_fn(data, "mykey"), "mykey"),
>                               (data, ""))
> -      self.assertEqualValues(load_fn(dump_fn(data, "myprivatekey",
> -                                             salt="mysalt"),
> +      self.assertEqualValues(load_fn(_dump_fn(data, "myprivatekey",
> +                                              salt="mysalt"),
>                                       "myprivatekey"),
>                               (data, "mysalt"))
>  
>        keydict = {
>          "mykey_id": "myprivatekey",
>          }
> -      self.assertEqualValues(load_fn(dump_fn(data, "myprivatekey",
> -                                             salt="mysalt",
> -                                             key_selector="mykey_id"),
> +      self.assertEqualValues(load_fn(_dump_fn(data, "myprivatekey",
> +                                              salt="mysalt",
> +                                              key_selector="mykey_id"),
>                                       keydict.get),
>                               (data, "mysalt"))
>        self.assertRaises(errors.SignatureError, load_fn,
> -                        dump_fn(data, "myprivatekey",
> -                                salt="mysalt",
> -                                key_selector="mykey_id"),
> +                        _dump_fn(data, "myprivatekey",
> +                                 salt="mysalt",
> +                                 key_selector="mykey_id"),
>                          {}.get)
>  
>      self.assertRaises(errors.SignatureError, load_fn,
> -                      dump_fn("test", "myprivatekey"),
> +                      _dump_fn("test", "myprivatekey"),
>                        "myotherkey")
>  
>      self.assertRaises(errors.SignatureError, load_fn,
> @@ -131,5 +143,83 @@ class TestLoadAndVerifyJson(unittest.TestCase):
>      self.assertEqual(serializer.LoadAndVerifyJson("\"Foo\"", ht.TAny), "Foo")
>  
>  
> +class TestPrivate(unittest.TestCase):
> +
> +  def testEquality(self):
> +    pDict = serializer.PrivateDict()
> +    pDict["bar"] = "egg"
> +    nDict = {"bar": "egg"}
> +    self.assertEqual(pDict, nDict, "PrivateDict-dict equality failure")
> +
> +  def testPrivateDictUnprivate(self):
> +    pDict = serializer.PrivateDict()
> +    pDict["bar"] = "egg"
> +    uDict = pDict.Unprivate()
> +    nDict = {"bar": "egg"}
> +    self.assertEquals(type(uDict), dict,
> +                      "PrivateDict.Unprivate() did not return a dict")
> +    self.assertEqual(pDict, uDict, "PrivateDict.Unprivate() equality 
> failure")
> +    self.assertEqual(nDict, uDict, "PrivateDict.Unprivate() failed to 
> return")
> +
> +  def testAttributeTransparency(self):
> +    class Dummy(object):
> +      pass
> +
> +    dummy = Dummy()
> +    dummy.bar = "egg"
> +    pDummy = serializer.Private(dummy)
> +    self.assertEqual(pDummy.bar, "egg", "Failed to access attribute of 
> Private")
> +
> +  def testCallTransparency(self):
> +    foo = serializer.Private("egg")
> +    self.assertEqual(foo.upper(), "EGG", "Failed to call Private instance")
> +
> +  def testFillDict(self):
> +    pDict = serializer.PrivateDict()
> +    pDict["bar"] = "egg"
> +    self.assertEqual(pDict, objects.FillDict({}, pDict))
> +
> +  def testLeak(self):
> +    pDict = serializer.PrivateDict()
> +    pDict["bar"] = "egg"
> +    self.assertNotIn("egg", str(pDict), "Value leaked in str(PrivateDict)")
> +    self.assertNotIn("egg", repr(pDict), "Value leaked in repr(PrivateDict)")
> +    self.assertNotIn("egg", "{0}".format(pDict),
> +                     "Value leaked in PrivateDict.__format__")
> +    self.assertNotIn("egg", serializer.Dump(pDict),
> +                     "Value leaked in serializer.Dump(PrivateDict)")
> +
> +  def testProperAccess(self):
> +    pDict = serializer.PrivateDict()
> +    pDict["bar"] = "egg"
> +
> +    self.assertIs("egg", pDict["bar"].Get(),
> +                  "Value not returned by Private.Get()")
> +    self.assertIs("egg", pDict.GetPrivate("bar"),
> +                  "Value not returned by Private.GetPrivate()")
> +    self.assertIs("egg", pDict.Unprivate()["bar"],
> +                  "Value not returned by PrivateDict.Unprivate()")
> +    self.assertIn(
> +      "egg",
> +      serializer.Dump(pDict,
> +                      private_encoder=serializer.EncodeWithPrivateFields)
> +    )
> +
> +  def testDictGet(self):
> +    self.assertIs("tar", serializer.PrivateDict().GetPrivate("bar", "tar"),
> +                  "Private.GetPrivate() did not handle the default case")
> +
> +  def testZeronessPrivate(self):
> +    self.assertTrue(serializer.Private("foo"),
> +                    "Private of non-empty string is false")
> +    self.assertFalse(serializer.Private(""), "Private empty string is true")
> +
> +
> +class TestCheckDoctests(unittest.TestCase):
> +
> +  def testCheckSerializer(self):
> +    results = doctest.testmod(serializer)
> +    self.assertEquals(results.failed, 0, "Doctest failures detected")
> +
>  if __name__ == "__main__":
>    testutils.GanetiTestProgram()
> -- 
> 1.9.0.rc1.175.g0b1dcb5
> 

-- 
Jose Antonio Lopes
Ganeti Engineering
Google Germany GmbH
Dienerstr. 12, 80331, München

Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg
Geschäftsführer: Graham Law, Christine Elizabeth Flores
Steuernummer: 48/725/00206
Umsatzsteueridentifikationsnummer: DE813741370

Reply via email to