Copilot commented on code in PR #300:
URL: https://github.com/apache/fluss-rust/pull/300#discussion_r2791083129
##########
website/sidebars.ts:
##########
@@ -0,0 +1,24 @@
+import type {SidebarsConfig} from '@docusaurus/plugin-content-docs';
+
+const sidebars: SidebarsConfig = {
Review Comment:
This sidebar config is missing the standard ASF license header comment at
the top. The repository enforces license headers via CI; please add the header
to this file.
##########
website/docs/index.md:
##########
@@ -0,0 +1,33 @@
+---
+slug: /
+sidebar_position: 1
+title: Introduction
+---
+
Review Comment:
This documentation page is missing the ASF license header (the repo’s other
Markdown docs start with the ASF header as an HTML comment, and CI checks
headers). Please add the header here (and consistently across the other new
`website/docs/**.md` pages).
##########
website/docs/user-guide/python/primary-key-tables.md:
##########
@@ -0,0 +1,88 @@
+---
+sidebar_position: 6
+---
+# Primary Key Tables
+
+Primary key tables (KV tables) support upsert, delete, and lookup operations.
+
+## Creating a Primary Key Table
+
+```python
+import pyarrow as pa
+
+pk_schema = pa.schema([
+ pa.field("id", pa.int32()),
+ pa.field("name", pa.string()),
+ pa.field("age", pa.int64()),
+])
+
+fluss_schema = fluss.Schema(pk_schema, primary_keys=["id"])
+table_descriptor = fluss.TableDescriptor(fluss_schema, bucket_count=3)
+
+table_path = fluss.TablePath("fluss", "users")
+await admin.create_table(table_path, table_descriptor, ignore_if_exists=True)
+```
+
+## Upserting Records
+
+```python
+table = await conn.get_table(table_path)
+upsert_writer = table.new_upsert()
+
+await upsert_writer.upsert({"id": 1, "name": "Alice", "age": 25})
+await upsert_writer.upsert({"id": 2, "name": "Bob", "age": 30})
+await upsert_writer.upsert({"id": 3, "name": "Charlie", "age": 35})
+await upsert_writer.flush()
+```
+
+## Updating Records
+
+Upsert with the same primary key to update an existing record.
+
+```python
+await upsert_writer.upsert({
+ "id": 1,
+ "name": "Alice Updated",
+ "age": 26,
+})
+await upsert_writer.flush()
+```
+
+## Deleting Records
+
+```python
+await upsert_writer.delete({"id": 2})
+await upsert_writer.flush()
+```
+
+## Partial Updates
+
+Update only specific columns while preserving others.
+
+```python
+# By column names
+partial_writer = table.new_upsert(columns=["id", "name", "age"])
+
+await partial_writer.upsert({
+ "id": 1, # primary key required
+ "name": "Alice Partial",
+ "age": 27,
+})
+await partial_writer.flush()
+
+# By column indices
+partial_writer_idx = table.new_upsert(column_indices=[0, 1, 3])
Review Comment:
`partial_writer_idx = table.new_upsert(column_indices=[0, 1, 3])` uses an
out-of-range index (this example schema only has 3 columns: indices 0..2).
Update the indices to match the schema.
```suggestion
partial_writer_idx = table.new_upsert(column_indices=[0, 1, 2])
```
##########
website/package.json:
##########
@@ -0,0 +1,43 @@
+{
+ "name": "fluss-clients-website",
+ "version": "0.0.0",
+ "private": true,
+ "scripts": {
+ "docusaurus": "docusaurus",
+ "start": "docusaurus start",
Review Comment:
This repo enforces ASF license headers via CI (SkyWalking Eyes).
`package.json` is JSON (no comments), so it can’t carry a header inline; as-is
it’s likely to fail the header check. Consider adding an appropriate
`paths-ignore` entry for this file in `.licenserc.yaml`, or switching the
tooling/config to exclude `*.json` files.
##########
website/docs/user-guide/python/log-tables.md:
##########
@@ -0,0 +1,125 @@
+---
+sidebar_position: 5
+---
+# Log Tables
+
+Log tables are append-only tables without primary keys, suitable for event
streaming.
+
+## Creating a Log Table
+
+```python
+import pyarrow as pa
+
+schema = pa.schema([
+ pa.field("event_id", pa.int32()),
+ pa.field("event_type", pa.string()),
+ pa.field("timestamp", pa.int64()),
+])
+
+fluss_schema = fluss.Schema(schema)
+table_descriptor = fluss.TableDescriptor(fluss_schema)
+
+table_path = fluss.TablePath("fluss", "events")
+await admin.create_table(table_path, table_descriptor, ignore_if_exists=True)
+```
+
+## Writing to Log Tables
+
+```python
+table = await conn.get_table(table_path)
+append_writer = await table.new_append_writer()
+```
+
+**Write a PyArrow Table:**
+
+```python
+pa_table = pa.Table.from_arrays(
+ [
+ pa.array([1, 2, 3], type=pa.int32()),
+ pa.array(["user_login", "page_view", "checkout"], type=pa.string()),
+ pa.array([1704067200000, 1704067201000, 1704067202000],
type=pa.int64()),
+ ],
+ schema=schema,
+)
+append_writer.write_arrow(pa_table)
+```
+
+**Write a PyArrow RecordBatch:**
+
+```python
+batch = pa.RecordBatch.from_arrays(
+ [
+ pa.array([4, 5], type=pa.int32()),
+ pa.array(["signup", "logout"], type=pa.string()),
+ pa.array([1704067203000, 1704067204000], type=pa.int64()),
+ ],
+ schema=schema,
+)
+append_writer.write_arrow_batch(batch)
+```
+
+**Write a single row (dict or list):**
+
+```python
+await append_writer.append({"event_id": 6, "event_type": "click", "timestamp":
1704067205000})
+await append_writer.append([7, "scroll", 1704067206000])
+```
+
+**Write a Pandas DataFrame:**
+
+```python
+import pandas as pd
+
+df = pd.DataFrame({
+ "event_id": [8, 9],
+ "event_type": ["hover", "submit"],
+ "timestamp": [1704067207000, 1704067208000],
+})
+append_writer.write_pandas(df)
+```
+
+**Flush:**
+
+```python
+append_writer.flush()
+```
+
+## Reading from Log Tables
+
+```python
+log_scanner = await table.new_log_scanner()
+
+# Subscribe from earliest to latest
+log_scanner.subscribe(None, None)
+
+# Read as PyArrow Table
+pa_result = log_scanner.to_arrow()
+
+# Or read as Pandas DataFrame
+log_scanner.subscribe(None, None)
+df = log_scanner.to_pandas()
+```
Review Comment:
The Python log-scanner API shown here doesn’t match the actual bindings:
there is no `table.new_log_scanner()`, and `subscribe()` does not accept `None`
values. The correct pattern is to create a scanner via `await
table.new_scan().create_log_scanner()` (record scanner for `poll()`) or `await
table.new_scan().create_batch_scanner()` (for `to_arrow()`/`to_pandas()`), then
call `subscribe(bucket_id, start_offset)` / `subscribe_buckets({...})` with
concrete bucket IDs and offsets.
##########
website/docs/user-guide/python/partitioned-tables.md:
##########
@@ -0,0 +1,78 @@
+---
+sidebar_position: 7
+---
+# Partitioned Tables
+
+Partitioned tables distribute data across partitions based on partition column
values, enabling efficient data organization and querying. Both log tables and
primary key tables support partitioning.
+
+## Partitioned Log Tables
+
+### Creating a Partitioned Log Table
+
+```python
+import pyarrow as pa
+
+schema = pa.schema([
+ pa.field("event_id", pa.int32()),
+ pa.field("event_type", pa.string()),
+ pa.field("dt", pa.string()),
+ pa.field("region", pa.string()),
+])
+
+fluss_schema = fluss.Schema(schema)
+descriptor = fluss.TableDescriptor(
+ fluss_schema,
+ partition_keys=["dt", "region"],
+ bucket_count=3,
+)
+
+table_path = fluss.TablePath("fluss", "partitioned_events")
+await admin.create_table(table_path, descriptor, ignore_if_exists=True)
+```
+
+### Writing to Partitioned Log Tables
+
+Writing works the same as non-partitioned tables. Include partition column
values in each row — the client routes records to the correct partition
automatically.
+
+```python
+table = await conn.get_table(table_path)
+writer = await table.new_append_writer()
+
+pa_table = pa.Table.from_arrays(
+ [
+ pa.array([1], type=pa.int32()),
+ pa.array(["user_login"], type=pa.string()),
+ pa.array(["2024-01-15"], type=pa.string()),
+ pa.array(["US"], type=pa.string()),
+ ],
+ schema=schema,
+)
+writer.write_arrow(pa_table)
+writer.flush()
Review Comment:
`AppendWriter.flush()` is async in the Python bindings (returns an
awaitable). The docs currently call `writer.flush()` without awaiting it; this
should be `await writer.flush()` to ensure data is actually flushed before
proceeding.
```suggestion
await writer.flush()
```
##########
website/docs/user-guide/python/example.md:
##########
@@ -0,0 +1,47 @@
+---
+sidebar_position: 2
+---
+# Example
+
+Minimal working examples: connect to Fluss, create a table, write data, and
read it back.
+
+```python
+import asyncio
+import pyarrow as pa
+import fluss
+
+async def main():
+ # Connect
+ config = fluss.Config({"bootstrap.servers": "127.0.0.1:9123"})
+ conn = await fluss.FlussConnection.connect(config)
+ admin = await conn.get_admin()
+
+ # Create a log table
+ table_path = fluss.TablePath("fluss", "quickstart_python")
+ schema = pa.schema([
+ pa.field("id", pa.int32()),
+ pa.field("name", pa.string()),
+ ])
+ descriptor = fluss.TableDescriptor(fluss.Schema(schema))
+ await admin.create_table(table_path, descriptor, ignore_if_exists=True)
+
+ # Write
+ table = await conn.get_table(table_path)
+ writer = await table.new_append_writer()
+ pa_table = pa.Table.from_arrays(
+ [pa.array([1], type=pa.int32()), pa.array(["hello"],
type=pa.string())],
+ schema=schema,
+ )
+ writer.write_arrow(pa_table)
+ writer.flush()
Review Comment:
This Python quickstart uses `writer.flush()` without awaiting it. In the
bindings, `flush()` returns an awaitable, so this should be `await
writer.flush()` to ensure the append completes before scanning.
```suggestion
await writer.flush()
```
##########
website/docs/user-guide/python/primary-key-tables.md:
##########
@@ -0,0 +1,88 @@
+---
+sidebar_position: 6
+---
+# Primary Key Tables
+
+Primary key tables (KV tables) support upsert, delete, and lookup operations.
+
+## Creating a Primary Key Table
+
+```python
+import pyarrow as pa
+
+pk_schema = pa.schema([
+ pa.field("id", pa.int32()),
+ pa.field("name", pa.string()),
+ pa.field("age", pa.int64()),
+])
+
+fluss_schema = fluss.Schema(pk_schema, primary_keys=["id"])
+table_descriptor = fluss.TableDescriptor(fluss_schema, bucket_count=3)
+
+table_path = fluss.TablePath("fluss", "users")
+await admin.create_table(table_path, table_descriptor, ignore_if_exists=True)
+```
+
+## Upserting Records
+
+```python
+table = await conn.get_table(table_path)
+upsert_writer = table.new_upsert()
+
+await upsert_writer.upsert({"id": 1, "name": "Alice", "age": 25})
+await upsert_writer.upsert({"id": 2, "name": "Bob", "age": 30})
+await upsert_writer.upsert({"id": 3, "name": "Charlie", "age": 35})
+await upsert_writer.flush()
+```
Review Comment:
The Python bindings’ `UpsertWriter.upsert()` returns a `WriteResultHandle`
synchronously; it is not an async function. The docs currently show `await
upsert_writer.upsert(...)`, which won’t work—either ignore the returned handle
and `await upsert_writer.flush()`, or `handle = upsert_writer.upsert(...);
await handle.wait()` for per-record ack (same applies to delete).
##########
website/docs/developer-guide/release.md:
##########
@@ -0,0 +1,181 @@
+# Release
+
+This document describes how to create a release of the Fluss clients
(fluss-rust, fluss-python, fluss-cpp) from the
[fluss-rust](https://github.com/apache/fluss-rust) repository. It follows the
[Apache Fluss release
guide](https://fluss.apache.org/community/how-to-release/creating-a-fluss-release/)
and the [Apache OpenDAL release
guide](https://nightlies.apache.org/opendal/opendal-docs-stable/community/release/).
+
+Publishing software has legal consequences. This guide complements the
[Product Release Policy](https://www.apache.org/legal/release-policy.html) and
[Release Distribution
Policy](https://infra.apache.org/release-distribution.html).
Review Comment:
This Markdown doc is missing the ASF license header (HTML comment) that the
repo’s other docs include and CI enforces. Please add the header at the top of
this file.
##########
website/docusaurus.config.ts:
##########
@@ -0,0 +1,84 @@
+import {themes as prismThemes} from 'prism-react-renderer';
+import type {Config} from '@docusaurus/types';
+import type * as Preset from '@docusaurus/preset-classic';
+
Review Comment:
This new TypeScript config file is missing the standard ASF license header.
The repo CI runs the SkyWalking Eyes header check, and existing source/config
files include this header; please add it here to avoid CI failures.
##########
website/docs/user-guide/python/log-tables.md:
##########
@@ -0,0 +1,125 @@
+---
+sidebar_position: 5
+---
+# Log Tables
+
+Log tables are append-only tables without primary keys, suitable for event
streaming.
+
+## Creating a Log Table
+
+```python
+import pyarrow as pa
+
+schema = pa.schema([
+ pa.field("event_id", pa.int32()),
+ pa.field("event_type", pa.string()),
+ pa.field("timestamp", pa.int64()),
+])
+
+fluss_schema = fluss.Schema(schema)
+table_descriptor = fluss.TableDescriptor(fluss_schema)
+
+table_path = fluss.TablePath("fluss", "events")
+await admin.create_table(table_path, table_descriptor, ignore_if_exists=True)
+```
+
+## Writing to Log Tables
+
+```python
+table = await conn.get_table(table_path)
+append_writer = await table.new_append_writer()
+```
+
+**Write a PyArrow Table:**
+
+```python
+pa_table = pa.Table.from_arrays(
+ [
+ pa.array([1, 2, 3], type=pa.int32()),
+ pa.array(["user_login", "page_view", "checkout"], type=pa.string()),
+ pa.array([1704067200000, 1704067201000, 1704067202000],
type=pa.int64()),
+ ],
+ schema=schema,
+)
+append_writer.write_arrow(pa_table)
+```
+
+**Write a PyArrow RecordBatch:**
+
+```python
+batch = pa.RecordBatch.from_arrays(
+ [
+ pa.array([4, 5], type=pa.int32()),
+ pa.array(["signup", "logout"], type=pa.string()),
+ pa.array([1704067203000, 1704067204000], type=pa.int64()),
+ ],
+ schema=schema,
+)
+append_writer.write_arrow_batch(batch)
+```
+
+**Write a single row (dict or list):**
+
+```python
+await append_writer.append({"event_id": 6, "event_type": "click", "timestamp":
1704067205000})
+await append_writer.append([7, "scroll", 1704067206000])
+```
+
+**Write a Pandas DataFrame:**
+
+```python
+import pandas as pd
+
+df = pd.DataFrame({
+ "event_id": [8, 9],
+ "event_type": ["hover", "submit"],
+ "timestamp": [1704067207000, 1704067208000],
+})
+append_writer.write_pandas(df)
+```
+
+**Flush:**
+
+```python
+append_writer.flush()
Review Comment:
`append_writer.flush()` is async in the Python bindings (returns an
awaitable). The docs currently call it without `await`, which can lead to reads
happening before data is flushed.
```suggestion
await append_writer.flush()
```
##########
website/babel.config.js:
##########
@@ -0,0 +1,3 @@
+module.exports = {
+ presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
+};
Review Comment:
This Babel config is missing the standard ASF license header comment. Since
CI enforces headers, please add the header here as well.
##########
website/docs/developer-guide/contributing.md:
##########
@@ -0,0 +1,126 @@
+# Contributing
+
+Welcome to the development guide for `fluss-rust`! This project builds the
Fluss Rust client and language-specific bindings (Python, C++).
+
+## Prerequisites
Review Comment:
This Markdown doc is missing the ASF license header (HTML comment) that the
repo’s other docs include and CI enforces. Please add the header at the top of
this file.
##########
website/tsconfig.json:
##########
@@ -0,0 +1,6 @@
+{
+ "extends": "@docusaurus/tsconfig",
+ "compilerOptions": {
+ "baseUrl": "."
+ }
+}
Review Comment:
This TypeScript config is JSON, which can’t safely include an ASF header if
the header checker requires it. TypeScript supports JSONC in `tsconfig.json`;
if headers are required, consider converting this to a JSONC-compatible header
(or adding it to the header-check ignore list).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]