Author: mahadev
Date: Wed Dec 16 23:27:38 2009
New Revision: 891474

URL: http://svn.apache.org/viewvc?rev=891474&view=rev
Log:
ZOOKEEPER-627. zkpython arbitrarily restricts the size of a \'get\' to 512 
bytes (henry robinson via mahadev)

Modified:
    hadoop/zookeeper/trunk/CHANGES.txt
    hadoop/zookeeper/trunk/src/contrib/zkpython/src/c/pyzk_docstrings.h
    hadoop/zookeeper/trunk/src/contrib/zkpython/src/c/zookeeper.c
    hadoop/zookeeper/trunk/src/contrib/zkpython/src/test/get_set_test.py

Modified: hadoop/zookeeper/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/CHANGES.txt?rev=891474&r1=891473&r2=891474&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/CHANGES.txt (original)
+++ hadoop/zookeeper/trunk/CHANGES.txt Wed Dec 16 23:27:38 2009
@@ -176,6 +176,9 @@
   ZOOKEEPER-630. Trunk has duplicate ObserverTest.java files
   (henry robinson via phunt)
 
+  ZOOKEEPER-627. zkpython arbitrarily restricts the size of a 'get' to 512
+  bytes (henry robinson via mahadev)
+
 IMPROVEMENTS:
   ZOOKEEPER-473. cleanup junit tests to eliminate false positives due to
   "socket reuse" and failure to close client (phunt via mahadev)

Modified: hadoop/zookeeper/trunk/src/contrib/zkpython/src/c/pyzk_docstrings.h
URL: 
http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/contrib/zkpython/src/c/pyzk_docstrings.h?rev=891474&r1=891473&r2=891474&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/contrib/zkpython/src/c/pyzk_docstrings.h 
(original)
+++ hadoop/zookeeper/trunk/src/contrib/zkpython/src/c/pyzk_docstrings.h Wed Dec 
16 23:27:38 2009
@@ -579,7 +579,9 @@
 "\n"
 "(subsequent parameters are optional)\n"
 " watcher if not None, a watch will be set at the server to notify \n"
-"the client if the node changes.\n"
+" the client if the node changes.\n"
+" bufferlen: This value defaults to 1024*1024 - 1Mb. This method returns \n"
+" the minimum of bufferlen and the true length of the znode's data. \n"
 "RETURNS:\n"
 " the data associated with the node\n"
 "OK operation completed succesfully\n"

Modified: hadoop/zookeeper/trunk/src/contrib/zkpython/src/c/zookeeper.c
URL: 
http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/contrib/zkpython/src/c/zookeeper.c?rev=891474&r1=891473&r2=891474&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/contrib/zkpython/src/c/zookeeper.c (original)
+++ hadoop/zookeeper/trunk/src/contrib/zkpython/src/c/zookeeper.c Wed Dec 16 
23:27:38 2009
@@ -848,33 +848,49 @@
   return build_stat(stat);
 }
 
+// As per ZK documentation, datanodes are limited
+// to 1Mb. Why not do a stat followed by a get, to 
+// determine how big the buffer should be? Because the znode
+// may get updated between calls, so we can't guarantee a 
+// complete get anyhow. 
+#define GET_BUFFER_SIZE 1024*1024
+
+// pyzoo_get has an extra parameter over the java/C equivalents.
+// If you set the fourth integer parameter buffer_len, we return 
+// min(buffer_len, datalength) bytes. This is set by default to 
+// GET_BUFFER_SIZE
 static PyObject *pyzoo_get(PyObject *self, PyObject *args)
 {
   int zkhid;
   char *path;
-  char buffer[512];
-  memset(buffer,0,sizeof(char)*512);
-  int buffer_len=512;
+  char *buffer; 
+  int buffer_len=GET_BUFFER_SIZE;
   struct Stat stat;
   PyObject *watcherfn = Py_None;
   pywatcher_t *pw = NULL;
-  if (!PyArg_ParseTuple(args, "is|O", &zkhid, &path, &watcherfn))
+  if (!PyArg_ParseTuple(args, "is|Oi", &zkhid, &path, &watcherfn, &buffer_len))
     return NULL;
   CHECK_ZHANDLE(zkhid);
-  if (watcherfn != Py_None)
-    pw = create_pywatcher( zkhid, watcherfn,0 );
+  if (watcherfn != Py_None) 
+               {
+                       pw = create_pywatcher( zkhid, watcherfn,0 );
+               }
+       buffer = malloc(sizeof(char)*buffer_len);
   int err = zoo_wget(zhandles[zkhid], path, 
                     watcherfn != Py_None ? watcher_dispatch : NULL, 
                     pw, buffer, 
                     &buffer_len, &stat);
+       
   PyObject *stat_dict = build_stat( &stat );
+
   if (err != ZOK)
-    {
+               {
       PyErr_SetString(err_to_exception(err), zerror(err));
       return NULL;
     }
-
-  return Py_BuildValue( "(s#,N)", buffer,buffer_len, stat_dict );
+       PyObject *ret = Py_BuildValue( "(s#,N)", buffer,buffer_len, stat_dict );
+       free(buffer);
+  return ret;
 }
 
 PyObject *pyzoo_get_acl(PyObject *self, PyObject *args)

Modified: hadoop/zookeeper/trunk/src/contrib/zkpython/src/test/get_set_test.py
URL: 
http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/contrib/zkpython/src/test/get_set_test.py?rev=891474&r1=891473&r2=891474&view=diff
==============================================================================
--- hadoop/zookeeper/trunk/src/contrib/zkpython/src/test/get_set_test.py 
(original)
+++ hadoop/zookeeper/trunk/src/contrib/zkpython/src/test/get_set_test.py Wed 
Dec 16 23:27:38 2009
@@ -71,6 +71,25 @@
         self.assertEqual(self.stat, None, "Stat should be none!")
         self.assertEqual(self.value, None, "Value should be none!")
 
+    def test_sync_get_large_datanode(self):
+        """
+        Test that we can retrieve datanode sizes up to
+        1Mb with default parameters (depends on ZooKeeper server).
+        """
+
+        data = ''.join(["A" for x in xrange(1024*1023)])
+        self.ensureDeleted("/zk-python-test-large-datanode")
+        zookeeper.create(self.handle, "/zk-python-test-large-datanode", data,
+                         [{"perms":0x1f, "scheme":"world", "id" :"anyone"}])
+        (ret,stat) = zookeeper.get(self.handle, 
"/zk-python-test-large-datanode")
+        self.assertEqual(len(ret), 1024*1023,
+                         "Should have got 1Mb returned, instead got %s" % 
len(ret))
+        (ret,stat) = zookeeper.get(self.handle, 
"/zk-python-test-large-datanode",None,500)
+        self.assertEqual(len(ret), 500,
+                         "Should have got 500 bytes returned, instead got %s" 
% len(ret))
+
+
+
     def test_async_getset(self):
         self.cv = threading.Condition()
         def get_callback(handle, rc, value, stat):


Reply via email to