felipecrv commented on code in PR #33:
URL: https://github.com/apache/arrow-experiments/pull/33#discussion_r1736271874


##########
http/get_multipart/python/server/server.py:
##########
@@ -0,0 +1,338 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from random import choice, randint
+from http.server import BaseHTTPRequestHandler, HTTPServer
+import io
+import json
+import secrets
+import string
+import time
+
+import pyarrow as pa
+
+# configuration: use chunked transfer encoding for HTTP/1.1 responses?
+CHUNKED_ENCODING = True
+
+
+def random_string(alphabet, length):
+    return "".join(choice(alphabet) for _ in range(length))
+
+
+def random_name(initial):
+    length = randint(3, 7)
+    return initial + random_string(string.ascii_lowercase, length)
+
+
+def example_tickers(num_tickers):
+    tickers = []
+    while len(tickers) < num_tickers:
+        length = randint(3, 4)
+        random_ticker = random_string(string.ascii_uppercase, length)
+        if random_ticker not in tickers:
+            tickers.append(random_ticker)
+    return tickers
+
+
+def example_json_data(tickers):
+    json_data = []
+    for ticker in tickers:
+        description = ""
+        for c in ticker:
+            description = " ".join(random_name(c) for c in ticker)
+        json_data.append(
+            {
+                "ticker": ticker,
+                "description": description,
+            }
+        )
+    return json_data
+
+
+the_schema = pa.schema(
+    [
+        ("ticker", pa.utf8()),
+        ("price", pa.int64()),
+        ("volume", pa.int64()),
+    ]
+)
+
+
+def example_batch(tickers, length):
+    data = {"ticker": [], "price": [], "volume": []}
+    for _ in range(length):
+        data["ticker"].append(choice(tickers))
+        data["price"].append(randint(1, 1000) * 100)
+        data["volume"].append(randint(1, 10000))
+
+    return pa.RecordBatch.from_pydict(data, the_schema)
+
+
+def example_batches(tickers):
+    # these parameters are chosen to generate a response
+    # of ~1 GB and chunks of ~140 KB.
+    total_records = 42_000_000
+    batch_len = 6 * 1024
+    # all the batches sent are random slices of the larger base batch
+    base_batch = example_batch(tickers, length=8 * batch_len)
+    batches = []
+    records = 0
+    while records < total_records:
+        length = min(batch_len, total_records - records)
+        offset = randint(0, base_batch.num_rows - length - 1)
+        batch = base_batch.slice(offset, length)
+        batches.append(batch)
+        records += length
+    return batches
+
+
+# end of example data generation
+
+
+def random_multipart_boundary():
+    """
+    Generate a random boundary string for a multipart response.
+
+    Uses a cryptographically secure random number generator to generate a
+    random boundary string for a multipart response. The boundary string has
+    enough entropy to make it impossible that it will be repeated in the
+    response body.
+
+    Use a new boundary string for each multipart response so that once the
+    secret is revealed to the client, it won't be possible to exploit it to
+    create a malicious response.
+    """
+    # 28 bytes (224 bits) of entropy is enough to make a collision impossible.
+    # See [1] for a mathematical discussion.
+    #
+    # The 28 bytes are encoded into URL-safe characters so the string ends

Review Comment:
   It's alphanumeric, -, and _. I'm expanding the comment.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to