merlimat closed pull request #2769: [website] make sure 2.1.1 documentation reflects to the features in 2.1.1 URL: https://github.com/apache/pulsar/pull/2769
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/site2/website/versioned_docs/version-2.1.0-incubating/client-libraries-cpp.md b/site2/website/versioned_docs/version-2.1.0-incubating/client-libraries-cpp.md index ae67ebca74..4f41c72962 100644 --- a/site2/website/versioned_docs/version-2.1.0-incubating/client-libraries-cpp.md +++ b/site2/website/versioned_docs/version-2.1.0-incubating/client-libraries-cpp.md @@ -13,7 +13,7 @@ The Pulsar C++ client has been successfully tested on **MacOS** and **Linux**. ### Install -> Since 2.1.0 release, Pulsar ships pre-built RPM and Debian packages. You can choose download +> Since the 2.1.0 release, Pulsar ships pre-built RPM and Debian packages. You can choose to download > and install those packages instead of building them yourself. #### RPM @@ -24,7 +24,7 @@ The Pulsar C++ client has been successfully tested on **MacOS** and **Linux**. | [client-debuginfo]({{pulsar:rpm:client-debuginfo}}) | [asc]({{pulsar:rpm:client-debuginfo}}.asc), [sha512]({{pulsar:rpm:client-debuginfo}}.sha512) | | [client-devel]({{pulsar:rpm:client-devel}}) | [asc]({{pulsar:rpm:client-devel}}.asc), [sha512]({{pulsar:rpm:client-devel}}.sha512) | -To install a RPM package, down the RPM packages and install them using following command: +To install a RPM package, download the RPM packages and install them using the following command: ```bash $ rpm -ivh apache-pulsar-client*.rpm @@ -37,7 +37,7 @@ $ rpm -ivh apache-pulsar-client*.rpm | [client]({{pulsar:deb:client}}) | [asc]({{pulsar:deb:client}}.asc), [sha1]({{pulsar:deb:client}}.sha1), [sha512]({{pulsar:deb:client}}.sha512) | | [client-devel]({{pulsar:deb:client-devel}}) | [asc]({{pulsar:deb:client-devel}}.asc), [sha1]({{pulsar:deb:client-devel}}.sha1), [sha512]({{pulsar:deb:client-devel}}.sha512) | -To install a DEB package, down the DEB packages and install them using following command: +To install a DEB package, download the DEB packages and install them using the following command: ```bash $ apt-install apache-pulsar-client*.deb @@ -107,7 +107,7 @@ This will install the package with the library and headers. To connect to Pulsar using client libraries, you need to specify a Pulsar protocol URL. -Pulsar protocol URLs are assigned to specific clusters, use the pulsar scheme and have a default port of 6650. Here’s an example for localhost: +Pulsar protocol URLs are assigned to specific clusters, use the pulsar URI scheme and have a default port of 6650. Here’s an example for localhost: ```http pulsar://localhost:6650 diff --git a/site2/website/versioned_docs/version-2.1.0-incubating/io-quickstart.md b/site2/website/versioned_docs/version-2.1.0-incubating/io-quickstart.md index afa8e31a6b..a3e8c37176 100644 --- a/site2/website/versioned_docs/version-2.1.0-incubating/io-quickstart.md +++ b/site2/website/versioned_docs/version-2.1.0-incubating/io-quickstart.md @@ -1,6 +1,6 @@ --- id: version-2.1.0-incubating-io-quickstart -title: "Tutorial: Connecting Pulsar with Apache Cassandra" +title: Tutorial: Connecting Pulsar with Apache Cassandra sidebar_label: Getting started original_id: io-quickstart --- diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/adaptors-kafka.md b/site2/website/versioned_docs/version-2.1.1-incubating/adaptors-kafka.md index 3e6de037f0..74eff85847 100644 --- a/site2/website/versioned_docs/version-2.1.1-incubating/adaptors-kafka.md +++ b/site2/website/versioned_docs/version-2.1.1-incubating/adaptors-kafka.md @@ -226,8 +226,6 @@ You can configure Pulsar authentication provider directly from the Kafka propert | Config property | Default | Notes | |:---------------------------------------|:--------|:---------------------------------------------------------------------------------------| | [`pulsar.authentication.class`](http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/ClientConfiguration.html#setAuthentication-org.apache.pulsar.client.api.Authentication-) | | Configure to auth provider. Eg. `org.apache.pulsar.client.impl.auth.AuthenticationTls` | -| [`pulsar.authentication.params.map`](http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/ClientConfiguration.html#setAuthentication-java.lang.String-java.util.Map-) | | Map which represents parameters for the Authentication-Plugin | -| [`pulsar.authentication.params.string`](http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/ClientConfiguration.html#setAuthentication-java.lang.String-java.lang.String-) | | String which represents parameters for the Authentication-Plugin, Eg. `key1:val1,key2:val2` | | [`pulsar.use.tls`](http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/ClientConfiguration.html#setUseTls-boolean-) | `false` | Enable TLS transport encryption | | [`pulsar.tls.trust.certs.file.path`](http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/ClientConfiguration.html#setTlsTrustCertsFilePath-java.lang.String-) | | Path for the TLS trust certificate store | | [`pulsar.tls.allow.insecure.connection`](http://pulsar.apache.org/api/client/org/apache/pulsar/client/api/ClientConfiguration.html#setTlsAllowInsecureConnection-boolean-) | `false` | Accept self-signed certificates from brokers | diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/cookbooks-tiered-storage.md b/site2/website/versioned_docs/version-2.1.1-incubating/cookbooks-tiered-storage.md index 03b74da04b..24b6beaf13 100644 --- a/site2/website/versioned_docs/version-2.1.1-incubating/cookbooks-tiered-storage.md +++ b/site2/website/versioned_docs/version-2.1.1-incubating/cookbooks-tiered-storage.md @@ -7,11 +7,6 @@ original_id: cookbooks-tiered-storage Pulsar's **Tiered Storage** feature allows older backlog data to be offloaded to long term storage, thereby freeing up space in BookKeeper and reducing storage costs. This cookbook walks you through using tiered storage in your Pulsar cluster. -Tiered storage currently uses [Apache Jclouds](https://jclouds.apache.org) to supports -[Amazon S3](https://aws.amazon.com/s3/) and [Google Cloud Storage](https://cloud.google.com/storage/)(GCS for short) -for long term storage. With Jclouds, it is easy to add support for more -[cloud storage providers](https://jclouds.apache.org/reference/providers/#blobstore-providers) in the future. - ## When should I use Tiered Storage? Tiered storage should be used when you have a topic for which you want to keep a very long backlog for a long time. For example, if you have a topic containing user actions which you use to train your recommendation systems, you may want to keep that data for a long time, so that if you change your recommendation algorithm you can rerun it against your full user history. @@ -24,62 +19,44 @@ A topic in Pulsar is backed by a log, known as a managed ledger. This log is com The Tiered Storage offloading mechanism takes advantage of this segment oriented architecture. When offloading is requested, the segments of the log are copied, one-by-one, to tiered storage. All segments of the log, apart from the segment currently being written to can be offloaded. -On the broker, the administrator must configure the bucket and credentials for the cloud storage service. -The configured bucket must exist before attempting to offload. If it does not exist, the offload operation will fail. - -Pulsar uses multi-part objects to upload the segment data. It is possible that a broker could crash while uploading the data. -We recommend you add a life cycle rule your bucket to expire incomplete multi-part upload after a day or two to avoid -getting charged for incomplete uploads. +## Amazon S3 -## Configuring the offload driver +Tiered storage currently supports S3 for long term storage. On the broker, the administrator must configure a S3 bucket and the AWS region where the bucket exists. Offloaded data will be placed into this bucket. -Offloading is configured in ```broker.conf```. +The configured S3 bucket must exist before attempting to offload. If it does not exist, the offload operation will fail. -At a minimum, the administrator must configure the driver, the bucket and the authenticating credentials. -There is also some other knobs to configure, like the bucket region, the max block size in backed storage, etc. +Pulsar users multipart objects to update the segment data. It is possible that a broker could crash while uploading the data. We recommend you add a lifecycle rule your S3 bucket to expire incomplete multipart upload after a day or two to avoid getting charged for incomplete uploads. -Currently we support driver of types: +### Configuring the broker -- `aws-s3`: [Simple Cloud Storage Service](https://aws.amazon.com/s3/) -- `google-cloud-storage`: [Google Cloud Storage](https://cloud.google.com/storage/) +Offloading is configured in ```broker.conf```. -> Driver names are case-insensitive for driver's name. There is a third driver type, `s3`, which is identical to `aws-s3`, -> though it requires that you specify an endpoint url using `s3ManagedLedgerOffloadServiceEndpoint`. This is useful if -> using a S3 compatible data store, other than AWS. +At a minimum, the user must configure the driver, the region and the bucket. ```conf -managedLedgerOffloadDriver=aws-s3 +managedLedgerOffloadDriver=S3 +s3ManagedLedgerOffloadRegion=eu-west-3 +s3ManagedLedgerOffloadBucket=pulsar-topic-offload ``` -### "aws-s3" Driver configuration +It is also possible to specify the s3 endpoint directly, using `s3ManagedLedgerOffloadServiceEndpoint`. This is useful if you are using a non-AWS storage service which provides an S3 compatible API. -#### Bucket and Region +> If the endpoint is specified directly, then the region must _not_ be set. -Buckets are the basic containers that hold your data. -Everything that you store in Cloud Storage must be contained in a bucket. -You can use buckets to organize your data and control access to your data, -but unlike directories and folders, you cannot nest buckets. +> The broker.conf of all brokers must have the same configuration for driver, region and bucket for offload to avoid data becoming unavailable as topics move from one broker to another. -```conf -s3ManagedLedgerOffloadBucket=pulsar-topic-offload -``` +Pulsar also provides some knobs to configure the size of requests sent to S3. -Bucket Region is the region where bucket located. Bucket Region is not a required -but a recommended configuration. If it is not configured, It will use the default region. +- `s3ManagedLedgerOffloadMaxBlockSizeInBytes` configures the maximum size of a "part" sent during a multipart upload. This cannot be smaller than 5MB. Default is 64MB. +- `s3ManagedLedgerOffloadReadBufferSizeInBytes` configures the block size for each individual read when reading back data from S3. Default is 1MB. -With AWS S3, the default region is `US East (N. Virginia)`. Page -[AWS Regions and Endpoints](https://docs.aws.amazon.com/general/latest/gr/rande.html) contains more information. +In both cases, these should not be touched unless you know what you are doing. -```conf -s3ManagedLedgerOffloadRegion=eu-west-3 -``` +> The broker must be rebooted for any changes in the configuration to take effect. -#### Authentication with AWS +### Authenticating with S3 -To be able to access AWS S3, you need to authenticate with AWS S3. -Pulsar does not provide any direct means of configuring authentication for AWS S3, -but relies on the mechanisms supported by the -[DefaultAWSCredentialsProviderChain](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html). +To be able to access S3, you need to authenticate with S3. Pulsar does not provide any direct means of configuring authentication for S3, but relies on the mechanisms supported by the [DefaultAWSCredentialsProviderChain](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html). Once you have created a set of credentials in the AWS IAM console, they can be configured in a number of ways. @@ -111,68 +88,6 @@ If you are running in EC2 you can also use instance profile credentials, provide > The broker must be rebooted for credentials specified in pulsar_env to take > effect. -#### Configuring the size of block read/write - -Pulsar also provides some knobs to configure the size of requests sent to AWS S3. - -- ```s3ManagedLedgerOffloadMaxBlockSizeInBytes``` configures the maximum size of - a "part" sent during a multipart upload. This cannot be smaller than 5MB. Default is 64MB. -- ```s3ManagedLedgerOffloadReadBufferSizeInBytes``` configures the block size for - each individual read when reading back data from AWS S3. Default is 1MB. - -In both cases, these should not be touched unless you know what you are doing. - -### "google-cloud-storage" Driver configuration - -Buckets are the basic containers that hold your data. Everything that you store in -Cloud Storage must be contained in a bucket. You can use buckets to organize your data and -control access to your data, but unlike directories and folders, you cannot nest buckets. - -```conf -gcsManagedLedgerOffloadBucket=pulsar-topic-offload -``` - -Bucket Region is the region where bucket located. Bucket Region is not a required but -a recommended configuration. If it is not configured, It will use the default region. - -Regarding GCS, buckets are default created in the `us multi-regional location`, -page [Bucket Locations](https://cloud.google.com/storage/docs/bucket-locations) contains more information. - -```conf -gcsManagedLedgerOffloadRegion=europe-west3 -``` - -#### Authentication with GCS - -The administrator needs to configure `gcsManagedLedgerOffloadServiceAccountKeyFile` in `broker.conf` -for the broker to be able to access the GCS service. `gcsManagedLedgerOffloadServiceAccountKeyFile` is -a Json file, containing the GCS credentials of a service account. -[Service Accounts section of this page](https://support.google.com/googleapi/answer/6158849) contains -more information of how to create this key file for authentication. More information about google cloud IAM -is available [here](https://cloud.google.com/storage/docs/access-control/iam). - -Usually these are the steps to create the authentication file: -1. Open the API Console Credentials page. -2. If it's not already selected, select the project that you're creating credentials for. -3. To set up a new service account, click New credentials and then select Service account key. -4. Choose the service account to use for the key. -5. Download the service account's public/private key as a JSON file that can be loaded by a Google API client library. - -```conf -gcsManagedLedgerOffloadServiceAccountKeyFile="/Users/hello/Downloads/project-804d5e6a6f33.json" -``` - -#### Configuring the size of block read/write - -Pulsar also provides some knobs to configure the size of requests sent to GCS. - -- ```gcsManagedLedgerOffloadMaxBlockSizeInBytes``` configures the maximum size of a "part" sent - during a multipart upload. This cannot be smaller than 5MB. Default is 64MB. -- ```gcsManagedLedgerOffloadReadBufferSizeInBytes``` configures the block size for each individual - read when reading back data from GCS. Default is 1MB. - -In both cases, these should not be touched unless you know what you are doing. - ## Configuring offload to run automatically Namespace policies can be configured to offload data automatically once a threshold is reached. The threshold is based on the size of data that the topic has stored on the pulsar cluster. Once the topic reaches the threshold, an offload operation will be triggered. Setting a negative value to the threshold will disable automatic offloading. Setting the threshold to 0 will cause the broker to offload data as soon as it possiby can. diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/deploy-bare-metal.md b/site2/website/versioned_docs/version-2.1.1-incubating/deploy-bare-metal.md index 4de56d94b8..12612cdc65 100644 --- a/site2/website/versioned_docs/version-2.1.1-incubating/deploy-bare-metal.md +++ b/site2/website/versioned_docs/version-2.1.1-incubating/deploy-bare-metal.md @@ -336,7 +336,7 @@ Create a ExclamationFunction `exclamation`. ```bash bin/pulsar-admin functions create \ --jar examples/api-examples.jar \ - --classname org.apache.pulsar.functions.api.examples.ExclamationFunction \ + --className org.apache.pulsar.functions.api.examples.ExclamationFunction \ --inputs persistent://public/default/exclamation-input \ --output persistent://public/default/exclamation-output \ --tenant public \ @@ -347,7 +347,7 @@ bin/pulsar-admin functions create \ Check if the function is running as expected by [triggering](functions-deploying.md#triggering-pulsar-functions) the function. ```bash -bin/pulsar-admin functions trigger --name exclamation --trigger-value "hello world" +bin/pulsar-admin functions trigger --name exclamation --triggerValue "hello world" ``` You will see output as below: diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/functions-api.md b/site2/website/versioned_docs/version-2.1.1-incubating/functions-api.md index 197a1fb6a8..d546cc0d4d 100644 --- a/site2/website/versioned_docs/version-2.1.1-incubating/functions-api.md +++ b/site2/website/versioned_docs/version-2.1.1-incubating/functions-api.md @@ -48,13 +48,13 @@ Deploying Pulsar Functions is handled by the [`pulsar-admin`](reference-pulsar-a ```bash $ bin/pulsar-admin functions localrun \ --py sanitizer.py \ # The Python file with the function's code - --classname sanitizer \ # The class or function holding the processing logic + --className sanitizer \ # The class or function holding the processing logic --tenant public \ # The function's tenant (derived from the topic name by default) --namespace default \ # The function's namespace (derived from the topic name by default) --name sanitizer-function \ # The name of the function (the class name by default) --inputs dirty-strings-in \ # The input topic(s) for the function --output clean-strings-out \ # The output topic for the function - --log-topic sanitizer-logs # The topic to which all functions logs are published + --logTopic sanitizer-logs # The topic to which all functions logs are published ``` For instructions on running functions in your Pulsar cluster, see the [Deploying Pulsar Functions](functions-deploying.md) guide. @@ -86,16 +86,7 @@ class DisplayFunctionName(Function): return "The function processing this message has the name {0}".format(function_name) ``` -### Functions, Messages and Message Types - -Pulsar Functions can take byte arrays as inputs and spit out byte arrays as output. However in languages that support typed interfaces(just Java at the moment) one can write typed Functions as well. In this scenario, there are two ways one can bind messages to types. -* [Schema Registry](#Schema-Registry) -* [SerDe](#SerDe) - -### Schema Registry -Pulsar has a built in [Schema Registry](concepts-schema-registry) and comes bundled with a variety of popular schema types(avro, json and protobuf). Pulsar Functions can leverage existing schema information from input topics to derive the input type. The same applies for output topic as well. - -### SerDe +### Serialization and deserialization (SerDe) SerDe stands for **Ser**ialization and **De**serialization. All Pulsar Functions use SerDe for message handling. How SerDe works by default depends on the language you're using for a particular function: @@ -129,7 +120,7 @@ When you run or update Pulsar Functions created using the [SDK](#available-apis) $ bin/pulsar-admin functions create \ --name word-filter \ # Other function configs - --user-config '{"forbidden-word":"rosebud"}' + --userConfig '{"forbidden-word":"rosebud"}' ``` If the function were a Python function, that config value could be accessed like this: @@ -252,10 +243,11 @@ The {@inject: javadoc:Context:/client/org/apache/pulsar/functions/api/Context} i ```java public interface Context { - Record<?> getCurrentRecord(); - Collection<String> getInputTopics(); - String getOutputTopic(); - String getOutputSchemaType(); + byte[] getMessageId(); + String getTopicName(); + Collection<String> getSourceTopics(); + String getSinkTopic(); + String getOutputSerdeClassName(); String getTenant(); String getNamespace(); String getFunctionName(); @@ -263,16 +255,13 @@ public interface Context { String getInstanceId(); String getFunctionVersion(); Logger getLogger(); - void incrCounter(String key, long amount); - long getCounter(String key); - void putState(String key, ByteBuffer value); - ByteBuffer getState(String key); - Map<String, Object> getUserConfigMap(); - Optional<Object> getUserConfigValue(String key); - Object getUserConfigValueOrDefault(String key, Object defaultValue); + Map<String, String> getUserConfigMap(); + Optional<String> getUserConfigValue(String key); + String getUserConfigValueOrDefault(String key, String default); void recordMetric(String metricName, double value); - <O> CompletableFuture<Void> publish(String topicName, O object, String schemaOrSerdeClassName); + <O> CompletableFuture<Void> publish(String topicName, O object, String serDeClassName); <O> CompletableFuture<Void> publish(String topicName, O object); + CompletableFuture<Void> ack(byte[] messageId, String topic); } ``` @@ -394,7 +383,7 @@ Here's an example [`create`](reference-pulsar-admin.md#create-1) operation: ```bash $ bin/pulsar-admin functions create \ --jar /path/to/your.jar \ - --output-serde-classname com.example.serde.TweetSerde \ + --outputSerdeClassName com.example.serde.TweetSerde \ # Other function attributes ``` @@ -432,8 +421,8 @@ If you want your function to produce logs, you need to specify a log topic when ```bash $ bin/pulsar-admin functions create \ --jar my-functions.jar \ - --classname my.package.LoggingFunction \ - --log-topic persistent://public/default/logging-function-logs \ + --className my.package.LoggingFunction \ + --logTopic persistent://public/default/logging-function-logs \ # Other function configs ``` @@ -446,7 +435,7 @@ The Java SDK's [`Context`](#context) object enables you to access key/value pair ```bash $ bin/pulsar-admin functions create \ # Other function configs - --user-config '{"word-of-the-day":"verdure"}' + --userConfig '{"word-of-the-day":"verdure"}' ``` To access that value in a Java function: @@ -523,7 +512,7 @@ Writing Pulsar Functions in Python entails implementing one of two things: ### Getting started -Regardless of which [deployment mode](functions-deploying.md) you're using, 'pulsar-client' python library has to installed on any machine that's running Pulsar Functions written in Python. +Regardless of which [deployment mode](functions-deploying.md) you're using, you'll need to install the following Python libraries on any machine that's running Pulsar Functions written in Python: That could be your local machine for [local run mode](functions-deploying.md#local-run-mode) or a machine running a Pulsar [broker](reference-terminology.md#broker) for [cluster mode](functions-deploying.md#cluster-mode). To install those libraries using pip: @@ -573,9 +562,7 @@ The [`Context`](https://github.com/apache/incubator-pulsar/blob/master/pulsar-cl Method | What it provides :------|:---------------- `get_message_id` | The message ID of the message being processed -`get_current_message_topic_name` | The topic of the message being currently being processed -`get_function_tenant` | The tenant under which the current Pulsar Function runs under -`get_function_namespace` | The namespace under which the current Pulsar Function runs under +`get_topic_name` | The input topic of the message being processed `get_function_name` | The name of the current Pulsar Function `get_function_id` | The ID of the current Pulsar Function `get_instance_id` | The ID of the current Pulsar Functions instance @@ -598,9 +585,9 @@ $ bin/pulsar-admin functions create \ --namespace default \ --name my_function \ --py my_function.py \ - --classname my_function.MyFunction \ - --custom-serde-inputs '{"input-topic-1":"Serde1","input-topic-2":"Serde2"}' \ - --output-serde-classname Serde3 \ + --className my_function.MyFunction \ + --customSerdeInputs '{"input-topic-1":"Serde1","input-topic-2":"Serde2"}' \ + --outputSerdeClassName Serde3 \ --output output-topic-1 ``` @@ -673,8 +660,8 @@ If you want your function to produce logs on a Pulsar topic, you need to specify ```bash $ bin/pulsar-admin functions create \ --py logging_function.py \ - --classname logging_function.LoggingFunction \ - --log-topic logging-function-logs \ + --className logging_function.LoggingFunction \ + --logTopic logging-function-logs \ # Other function configs ``` @@ -687,7 +674,7 @@ The Python SDK's [`Context`](#context) object enables you to access key/value pa ```bash $ bin/pulsar-admin functions create \ # Other function configs \ - --user-config '{"word-of-the-day":"verdure"}' + --userConfig '{"word-of-the-day":"verdure"}' ``` To access that value in a Python function: diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/functions-deploying.md b/site2/website/versioned_docs/version-2.1.1-incubating/functions-deploying.md index a467727186..c5d005d4f9 100644 --- a/site2/website/versioned_docs/version-2.1.1-incubating/functions-deploying.md +++ b/site2/website/versioned_docs/version-2.1.1-incubating/functions-deploying.md @@ -44,7 +44,7 @@ When managing Pulsar Functions, you'll need to specify a variety of information Parameter | Default :---------|:------- -Function name | Whichever value is specified for the class name (minus org, library, etc.). The flag `--classname org.example.MyFunction`, for example, would give the function a name of `MyFunction`. +Function name | Whichever value is specified for the class name (minus org, library, etc.). The flag `--className org.example.MyFunction`, for example, would give the function a name of `MyFunction`. Tenant | Derived from the input topics' names. If the input topics are under the `marketing` tenant---i.e. the topic names have the form `persistent://marketing/{namespace}/{topicName}`---then the tenant will be `marketing`. Namespace | Derived from the input topics' names. If the input topics are under the `asia` namespace under the `marketing` tenant---i.e. the topic names have the form `persistent://marketing/asia/{topicName}`, then the namespace will be `asia`. Output topic | `{input topic}-{function name}-output`. A function with an input topic name of `incoming` and a function name of `exclamation`, for example, would have an output topic of `incoming-exclamation-output`. @@ -59,7 +59,7 @@ Take this `create` command: ```bash $ bin/pulsar-admin functions create \ --jar my-pulsar-functions.jar \ - --classname org.example.MyFunction \ + --className org.example.MyFunction \ --inputs my-function-input-topic1,my-function-input-topic2 ``` @@ -72,7 +72,7 @@ If you run a Pulsar Function in **local run** mode, it will run on the machine f ```bash $ bin/pulsar-admin functions localrun \ --py myfunc.py \ - --classname myfunc.SomeFunction \ + --className myfunc.SomeFunction \ --inputs persistent://public/default/input-1 \ --output persistent://public/default/output-1 ``` @@ -81,7 +81,7 @@ By default, the function will connect to a Pulsar cluster running on the same ma ```bash $ bin/pulsar-admin functions localrun \ - --broker-service-url pulsar://my-cluster-host:6650 \ + --brokerServiceUrl pulsar://my-cluster-host:6650 \ # Other function parameters ``` @@ -92,7 +92,7 @@ When you run a Pulsar Function in **cluster mode**, the function code will be up ```bash $ bin/pulsar-admin functions create \ --py myfunc.py \ - --classname myfunc.SomeFunction \ + --className myfunc.SomeFunction \ --inputs persistent://public/default/input-1 \ --output persistent://public/default/output-1 ``` @@ -104,7 +104,7 @@ You can use the [`update`](reference-pulsar-admin.md#update-1) command to update ```bash $ bin/pulsar-admin functions update \ --py myfunc.py \ - --classname myfunc.SomeFunction \ + --className myfunc.SomeFunction \ --inputs persistent://public/default/new-input-topic \ --output persistent://public/default/new-output-topic ``` @@ -144,7 +144,7 @@ And here's the corresponding update command: ```bash $ bin/pulsar-admin functions update \ - --function-config-file function-config.yaml + --functionConfigFile function-config.yaml ``` ### Function instance resources @@ -162,7 +162,7 @@ Here's an example function creation command that allocates 8 cores, 8 GB of RAM, ```bash $ bin/pulsar-admin functions create \ --jar target/my-functions.jar \ - --classname org.example.functions.MyFunction \ + --className org.example.functions.MyFunction \ --cpu 8 \ --ram 8589934592 \ --disk 10737418240 @@ -193,7 +193,7 @@ $ bin/pulsar-admin functions create \ --namespace default \ --name myfunc \ --py myfunc.py \ - --classname myfunc \ + --className myfunc \ --inputs persistent://public/default/in \ --output persistent://public/default/out ``` @@ -213,7 +213,7 @@ $ bin/pulsar-admin functions trigger \ --tenant public \ --namespace default \ --name myfunc \ - --trigger-value "hello world" + --triggerValue "hello world" ``` The consumer listening on the output topic should then produce this in its logs: diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/functions-guarantees.md b/site2/website/versioned_docs/version-2.1.1-incubating/functions-guarantees.md index d834f7ab19..35823ca372 100644 --- a/site2/website/versioned_docs/version-2.1.1-incubating/functions-guarantees.md +++ b/site2/website/versioned_docs/version-2.1.1-incubating/functions-guarantees.md @@ -19,7 +19,7 @@ You can set the processing guarantees for a Pulsar Function when you create the ```bash $ bin/pulsar-admin functions create \ - --processing-guarantees EFFECTIVELY_ONCE \ + --processingGuarantees EFFECTIVELY_ONCE \ # Other function configs ``` @@ -37,6 +37,6 @@ You can change the processing guarantees applied to a function once it's already ```bash $ bin/pulsar-admin functions update \ - --processing-guarantees ATMOST_ONCE \ + --processingGuarantees ATMOST_ONCE \ # Other function configs ``` diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/functions-overview.md b/site2/website/versioned_docs/version-2.1.1-incubating/functions-overview.md index fe76f65c55..1853604048 100644 --- a/site2/website/versioned_docs/version-2.1.1-incubating/functions-overview.md +++ b/site2/website/versioned_docs/version-2.1.1-incubating/functions-overview.md @@ -98,7 +98,7 @@ public class WordCountFunction implements Function<String, Void> { ```bash $ bin/pulsar-admin functions create \ --jar target/my-jar-with-dependencies.jar \ - --classname org.example.functions.WordCountFunction \ + --className org.example.functions.WordCountFunction \ --tenant public \ --namespace default \ --name word-count \ @@ -149,7 +149,7 @@ $ bin/pulsar-functions localrun \ --inputs persistent://public/default/test_src \ --output persistent://public/default/test_result \ --jar examples/api-examples.jar \ - --classname org.apache.pulsar.functions.api.examples.ExclamationFunction + --className org.apache.pulsar.functions.api.examples.ExclamationFunction ``` ## Fully Qualified Function Name (FQFN) @@ -173,7 +173,7 @@ If you're supplying a YAML configuration, you must specify a path to the file on ```bash $ bin/pulsar-admin functions create \ - --function-config-file ./my-function.yaml + --functionConfigFile ./my-function.yaml ``` And here's an example `my-function.yaml` file: @@ -278,7 +278,7 @@ If you run a Pulsar Function in **local run** mode, it will run on the machine f ```bash $ bin/pulsar-admin functions localrun \ --py myfunc.py \ - --classname myfunc.SomeFunction \ + --className myfunc.SomeFunction \ --inputs persistent://public/default/input-1 \ --output persistent://public/default/output-1 ``` @@ -287,7 +287,7 @@ By default, the function will connect to a Pulsar cluster running on the same ma ```bash $ bin/pulsar-admin functions localrun \ - --broker-service-url pulsar://my-cluster-host:6650 \ + --brokerServiceUrl pulsar://my-cluster-host:6650 \ # Other function parameters ``` @@ -298,7 +298,7 @@ When you run a Pulsar Function in **cluster mode**, the function code will be up ```bash $ bin/pulsar-admin functions create \ --py myfunc.py \ - --classname myfunc.SomeFunction \ + --className myfunc.SomeFunction \ --inputs persistent://public/default/input-1 \ --output persistent://public/default/output-1 ``` @@ -317,7 +317,7 @@ $ bin/pulsar-admin functions create \ --tenant public \ --namespace default \ --py func.py \ - --classname func.ParallelFunction \ + --className func.ParallelFunction \ --parallelism 5 ``` @@ -336,7 +336,7 @@ Here's an example function creation command that allocates 8 cores, 8 GB of RAM, ```bash $ bin/pulsar-admin functions create \ --jar target/my-functions.jar \ - --classname org.example.functions.MyFunction \ + --className org.example.functions.MyFunction \ --cpu 8 \ --ram 8589934592 \ --disk 10737418240 @@ -351,7 +351,7 @@ Pulsar Functions created using the [Pulsar Functions SDK](#the-pulsar-functions- ```bash $ bin/pulsar-admin functions create \ --name my-func-1 \ - --log-topic persistent://public/default/my-func-1-log \ + --logTopic persistent://public/default/my-func-1-log \ # Other configs ``` @@ -379,7 +379,7 @@ Here's an example of passing a user configuration to a function: ```bash $ bin/pulsar-admin functions create \ - --user-config '{"key-1":"value-1","key-2","value-2"}' \ + --userConfig '{"key-1":"value-1","key-2","value-2"}' \ # Other configs ``` @@ -417,7 +417,7 @@ $ bin/pulsar-admin functions trigger \ --tenant public \ --namespace default \ --name reverse-func \ - --trigger-value "snoitcnuf raslup ot emoclew" + --triggerValue "snoitcnuf raslup ot emoclew" ``` That should return `welcome to pulsar functions` as the console output. @@ -439,7 +439,7 @@ This command, for example, would run a function in [cluster mode](#cluster-run-m ```bash $ bin/pulsar-admin functions create \ --name my-effectively-once-function \ - --processing-guarantees EFFECTIVELY_ONCE \ + --processingGuarantees EFFECTIVELY_ONCE \ # Other function configs ``` diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/functions-quickstart.md b/site2/website/versioned_docs/version-2.1.1-incubating/functions-quickstart.md index 10d7e2a6ca..c17626be87 100644 --- a/site2/website/versioned_docs/version-2.1.1-incubating/functions-quickstart.md +++ b/site2/website/versioned_docs/version-2.1.1-incubating/functions-quickstart.md @@ -49,7 +49,7 @@ A JAR file containing this and several other functions (written in Java) is incl ```bash $ bin/pulsar-admin functions localrun \ --jar examples/api-examples.jar \ - --classname org.apache.pulsar.functions.api.examples.ExclamationFunction \ + --className org.apache.pulsar.functions.api.examples.ExclamationFunction \ --inputs persistent://public/default/exclamation-input \ --output persistent://public/default/exclamation-output \ --name exclamation @@ -105,7 +105,7 @@ This command, for example, would deploy the same exclamation function we ran loc ```bash $ bin/pulsar-admin functions create \ --jar examples/api-examples.jar \ - --classname org.apache.pulsar.functions.api.examples.ExclamationFunction \ + --className org.apache.pulsar.functions.api.examples.ExclamationFunction \ --inputs persistent://public/default/exclamation-input \ --output persistent://public/default/exclamation-output \ --name exclamation @@ -172,7 +172,7 @@ As we can see, the parallelism of the function is 1, meaning that only one insta ```bash $ bin/pulsar-admin functions update \ --jar examples/api-examples.jar \ - --classname org.apache.pulsar.functions.api.examples.ExclamationFunction \ + --className org.apache.pulsar.functions.api.examples.ExclamationFunction \ --inputs persistent://public/default/exclamation-input \ --output persistent://public/default/exclamation-output \ --tenant public \ @@ -236,7 +236,7 @@ Here, the `process` method defines the processing logic of the Pulsar Function. ```bash $ bin/pulsar-admin functions create \ --py reverse.py \ - --class-name reverse \ + --className reverse \ --inputs persistent://public/default/backwards \ --output persistent://public/default/forwards \ --tenant public \ @@ -251,7 +251,7 @@ $ bin/pulsar-admin functions trigger \ --name reverse \ --tenant public \ --namespace default \ - --trigger-value "sdrawrof won si tub sdrawkcab saw gnirts sihT" + --triggerValue "sdrawrof won si tub sdrawkcab saw gnirts sihT" ``` You should get this output: diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/io-managing.md b/site2/website/versioned_docs/version-2.1.1-incubating/io-managing.md index fb40833262..7d20afbc06 100644 --- a/site2/website/versioned_docs/version-2.1.1-incubating/io-managing.md +++ b/site2/website/versioned_docs/version-2.1.1-incubating/io-managing.md @@ -55,22 +55,22 @@ Pulsar connectors can be managed using the [`source`](reference-pulsar-admin.md# You can submit a source to be run in an existing Pulsar cluster using a command of this form: ```bash -$ ./bin/pulsar-admin source create --classname <classname> --archive <jar-location> --tenant <tenant> --namespace <namespace> --name <source-name> --destination-topic-name <output-topic> +$ ./bin/pulsar-admin source create --className <classname> --jar <jar-location> --tenant <tenant> --namespace <namespace> --name <source-name> --destinationTopicName <output-topic> ``` Here’s an example command: ```bash -bin/pulsar-admin source create --classname org.apache.pulsar.io.twitter.TwitterFireHose --archive ~/application.jar --tenant test --namespace ns1 --name twitter-source --destination-topic-name twitter_data +bin/pulsar-admin source create --className org.apache.pulsar.io.twitter.TwitterFireHose --jar ~/application.jar --tenant test --namespace ns1 --name twitter-source --destinationTopicName twitter_data ``` Instead of submitting a source to run on an existing Pulsar cluster, you alternatively can run a source as a process on your local machine: ```bash -bin/pulsar-admin source localrun --classname org.apache.pulsar.io.twitter.TwitterFireHose --archive ~/application.jar --tenant test --namespace ns1 --name twitter-source --destination-topic-name twitter_data +bin/pulsar-admin source localrun --className org.apache.pulsar.io.twitter.TwitterFireHose --jar ~/application.jar --tenant test --namespace ns1 --name twitter-source --destinationTopicName twitter_data ``` -If you are submitting a built-in source, you don't need to specify `--classname` and `--archive`. +If you are submitting a built-in source, you don't need to specify `--className` and `--jar`. You can simply specify the source type `--source-type`. The command to submit a built-in source is in following form: @@ -79,7 +79,7 @@ in following form: --tenant <tenant> \ --namespace <namespace> \ --name <source-name> \ - --destination-topic-name <input-topics> \ + --destinationTopicName <input-topics> \ --source-type <source-type> ``` @@ -90,7 +90,7 @@ Here's an example to submit a Kafka source: --tenant test-tenant \ --namespace test-namespace \ --name test-kafka-source \ - --destination-topic-name pulsar_sink_topic \ + --destinationTopicName pulsar_sink_topic \ --source-type kafka ``` @@ -99,22 +99,22 @@ Here's an example to submit a Kafka source: You can submit a sink to be run in an existing Pulsar cluster using a command of this form: ```bash -./bin/pulsar-admin sink create --classname <classname> --archive <jar-location> --tenant test --namespace <namespace> --name <sink-name> --inputs <input-topics> +./bin/pulsar-admin sink create --className <classname> --jar <jar-location> --tenant test --namespace <namespace> --name <sink-name> --inputs <input-topics> ``` Here’s an example command: ```bash -./bin/pulsar-admin sink create --classname org.apache.pulsar.io.cassandra --archive ~/application.jar --tenant test --namespace ns1 --name cassandra-sink --inputs test_topic +./bin/pulsar-admin sink create --className org.apache.pulsar.io.cassandra --jar ~/application.jar --tenant test --namespace ns1 --name cassandra-sink --inputs test_topic ``` Instead of submitting a sink to run on an existing Pulsar cluster, you alternatively can run a sink as a process on your local machine: ```bash -./bin/pulsar-admin sink localrun --classname org.apache.pulsar.io.cassandra --archive ~/application.jar --tenant test --namespace ns1 --name cassandra-sink --inputs test_topic +./bin/pulsar-admin sink localrun --className org.apache.pulsar.io.cassandra --jar ~/application.jar --tenant test --namespace ns1 --name cassandra-sink --inputs test_topic ``` -If you are submitting a built-in sink, you don't need to specify `--classname` and `--archive`. +If you are submitting a built-in sink, you don't need to specify `--className` and `--jar`. You can simply specify the sink type `--sink-type`. The command to submit a built-in sink is in following form: diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/io-quickstart.md b/site2/website/versioned_docs/version-2.1.1-incubating/io-quickstart.md index 3aa5c5ba19..bb06b8b396 100644 --- a/site2/website/versioned_docs/version-2.1.1-incubating/io-quickstart.md +++ b/site2/website/versioned_docs/version-2.1.1-incubating/io-quickstart.md @@ -235,7 +235,7 @@ bin/pulsar-admin sink create \ --namespace default \ --name cassandra-test-sink \ --sink-type cassandra \ - --sink-config-file examples/cassandra-sink.yml \ + --sinkConfigFile examples/cassandra-sink.yml \ --inputs test_cassandra ``` diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/reference-pulsar-admin.md b/site2/website/versioned_docs/version-2.1.1-incubating/reference-pulsar-admin.md index f8cd205c8c..e10f9a39ce 100644 --- a/site2/website/versioned_docs/version-2.1.1-incubating/reference-pulsar-admin.md +++ b/site2/website/versioned_docs/version-2.1.1-incubating/reference-pulsar-admin.md @@ -307,36 +307,24 @@ Options |`--cpu`|The CPU to allocate to each function instance (in number of cores)|| |`--ram`|The RAM to allocate to each function instance (in bytes)|| |`--disk`|The disk space to allocate to each function instance (in bytes)|| -|`--auto-ack`|Let the functions framework manage acking|| -|`--subs-name`|Pulsar source subscription name if user wants a specific subscription-name for input-topic consumer|| -|`--broker-service-url `|The URL of the Pulsar broker|| -|`--classname`|The name of the function’s class|| -|`--custom-serde-inputs`|A map of the input topic to SerDe name|| -|`--custom-schema-inputs`|A map of the input topic to Schema class name|| -|`--client-auth-params`|Client Authentication Params|| -|`--function-config-file`|The path of the YAML config file used to configure the function|| -|`--hostname-verification-enabled`|Enable Hostname verification|| -|`--instance-id-offset`|Instance ids will be assigned starting from this offset|| +|`--brokerServiceUrl `|The URL of the Pulsar broker|| +|`--className`|The name of the function’s class|| +|`--customSerdeInputs`|A map of the input topic to SerDe name|| +|`--functionConfigFile`|The path of the YAML config file used to configure the function|| |`--inputs`|The input topics for the function (as a comma-separated list if more than one topic is desired)|| -|`--log-topic`|The topic to which logs from this function are published|| +|`--logTopic`|The topic to which logs from this function are published|| |`--jar`|A path to the JAR file for the function (if the function is written in Java)|| -|`--name`|The name of the function|| +|`--name`|The name of the function|The value specified by --className| |`--namespace`|The function’s namespace|| |`--output`|The name of the topic to which the function publishes its output (if any)|| -|`--output-serde-classname`|The SerDe class used for the function’s output|| +|`--outputSerdeClassName`|The SerDe class used for the function’s output|| |`--parallelism`|The function’s parallelism factor, i.e. the number of instances of the function to run|1| -|`--processing-guarantees`|The processing guarantees applied to the function. Can be one of: ATLEAST_ONCE, ATMOST_ONCE, or EFFECTIVELY_ONCE|ATLEAST_ONCE| +|`--processingGuarantees`|The processing guarantees applied to the function. Can be one of: ATLEAST_ONCE, ATMOST_ONCE, or EFFECTIVELY_ONCE|ATLEAST_ONCE| |`--py`|The path of the Python file containing the function’s processing logic (if the function is written in Python)|| -|`--schema-type`|Schema Type to be used for storing output messages|| -|`--sliding-interval-count`|Number of messages after which the window ends|| -|`--sliding-interval-duration-ms`|The time duration after which the window slides|| -|`--state-storage-service-url`|The service URL for the function’s state storage (if the function uses a storage system different from the Apache BookKeeper cluster used by Pulsar)|| -|`--subscription-type`|The subscription type used by the function when consuming messages on the input topic(s). Can be either SHARED or EXCLUSIVE|SHARED| +|`--stateStorageServiceUrl`|The service URL for the function’s state storage (if the function uses a storage system different from the Apache BookKeeper cluster used by Pulsar)|| +|`--subscriptionType`|The subscription type used by the function when consuming messages on the input topic(s). Can be either SHARED or EXCLUSIVE|SHARED| |`--tenant`|The function’s tenant|| -|`--topics-pattern`|The topic pattern to consume from list of topics under a namespace that match the pattern|| -|`--user-config`|A user-supplied config value, set as a key/value pair. You can set multiple user config values.|| -|`--window-length-count`|The number of messages per window.|| -|`--window-length-duration-ms`|The time duration of the window in milliseconds.|| +|`--userConfig`|A user-supplied config value, set as a key/value pair. You can set multiple user config values.|| ### `create` @@ -353,31 +341,24 @@ Options |`--cpu`|The CPU to allocate to each function instance (in number of cores)|| |`--ram`|The RAM to allocate to each function instance (in bytes)|| |`--disk`|The disk space to allocate to each function instance (in bytes)|| -|`--auto-ack`|Let the functions framework manage acking|| -|`--subs-name`|Pulsar source subscription name if user wants a specific subscription-name for input-topic consumer|| -|`--classname`|The name of the function’s class|| -|`--custom-serde-inputs`|A map of the input topic to SerDe name|| -|`--custom-schema-inputs`|A map of the input topic to Schema class name|| -|`--function-config-file`|The path of the YAML config file used to configure the function|| +|`--brokerServiceUrl `|The URL of the Pulsar broker|| +|`--className`|The name of the function’s class|| +|`--customSerdeInputs`|A map of the input topic to SerDe name|| +|`--functionConfigFile`|The path of the YAML config file used to configure the function|| |`--inputs`|The input topics for the function (as a comma-separated list if more than one topic is desired)|| -|`--log-topic`|The topic to which logs from this function are published|| +|`--logTopic`|The topic to which logs from this function are published|| |`--jar`|A path to the JAR file for the function (if the function is written in Java)|| -|`--name`|The name of the function|| +|`--name`|The name of the function|The value specified by --className| |`--namespace`|The function’s namespace|| |`--output`|The name of the topic to which the function publishes its output (if any)|| -|`--output-serde-classname`|The SerDe class used for the function’s output|| +|`--outputSerdeClassName`|The SerDe class used for the function’s output|| |`--parallelism`|The function’s parallelism factor, i.e. the number of instances of the function to run|1| -|`--processing-guarantees`|The processing guarantees applied to the function. Can be one of: ATLEAST_ONCE, ATMOST_ONCE, or EFFECTIVELY_ONCE|ATLEAST_ONCE| +|`--processingGuarantees`|The processing guarantees applied to the function. Can be one of: ATLEAST_ONCE, ATMOST_ONCE, or EFFECTIVELY_ONCE|ATLEAST_ONCE| |`--py`|The path of the Python file containing the function’s processing logic (if the function is written in Python)|| -|`--schema-type`|Schema Type to be used for storing output messages|| -|`--sliding-interval-count`|Number of messages after which the window ends|| -|`--sliding-interval-duration-ms`|The time duration after which the window slides|| -|`--subscription-type`|The subscription type used by the function when consuming messages on the input topic(s). Can be either SHARED or EXCLUSIVE|SHARED| +|`--stateStorageServiceUrl`|The service URL for the function’s state storage (if the function uses a storage system different from the Apache BookKeeper cluster used by Pulsar)|| +|`--subscriptionType`|The subscription type used by the function when consuming messages on the input topic(s). Can be either SHARED or EXCLUSIVE|SHARED| |`--tenant`|The function’s tenant|| -|`--topics-pattern`|The topic pattern to consume from list of topics under a namespace that match the pattern|| -|`--user-config`|A user-supplied config value, set as a key/value pair. You can set multiple user config values.|| -|`--window-length-count`|The number of messages per window.|| -|`--window-length-duration-ms`|The time duration of the window in milliseconds.|| +|`--userConfig`|A user-supplied config value, set as a key/value pair. You can set multiple user config values.|| ### `delete` @@ -408,35 +389,22 @@ $ pulsar-admin functions update options Options |Flag|Description|Default| |---|---|---| -|`--cpu`|The CPU to allocate to each function instance (in number of cores)|| -|`--ram`|The RAM to allocate to each function instance (in bytes)|| -|`--disk`|The disk space to allocate to each function instance (in bytes)|| -|`--auto-ack`|Let the functions framework manage acking|| -|`--subs-name`|Pulsar source subscription name if user wants a specific subscription-name for input-topic consumer|| -|`--classname`|The name of the function’s class|| -|`--custom-serde-inputs`|A map of the input topic to SerDe name|| -|`--custom-schema-inputs`|A map of the input topic to Schema class name|| -|`--function-config-file`|The path of the YAML config file used to configure the function|| +|`--className`|The name of the function’s class|| +|`--customSerdeInputs`|A map of the input topic to SerDe name|| +|`--functionConfigFile`|The path of the YAML config file used to configure the function|| |`--inputs`|The input topics for the function (as a comma-separated list if more than one topic is desired)|| -|`--log-topic`|The topic to which logs from this function are published|| +|`--logTopic`|The topic to which logs from this function are published|| |`--jar`|A path to the JAR file for the function (if the function is written in Java)|| -|`--name`|The name of the function|| +|`--name`|The name of the function|The value specified by --className| |`--namespace`|The function’s namespace|| |`--output`|The name of the topic to which the function publishes its output (if any)|| -|`--output-serde-classname`|The SerDe class used for the function’s output|| +|`--outputSerdeClassName`|The SerDe class used for the function’s output|| |`--parallelism`|The function’s parallelism factor, i.e. the number of instances of the function to run|1| -|`--processing-guarantees`|The processing guarantees applied to the function. Can be one of: ATLEAST_ONCE, ATMOST_ONCE, or EFFECTIVELY_ONCE|ATLEAST_ONCE| +|`--processingGuarantees`|The processing guarantees applied to the function. Can be one of: ATLEAST_ONCE, ATMOST_ONCE, or EFFECTIVELY_ONCE|ATLEAST_ONCE| |`--py`|The path of the Python file containing the function’s processing logic (if the function is written in Python)|| -|`--schema-type`|Schema Type to be used for storing output messages|| -|`--sliding-interval-count`|Number of messages after which the window ends|| -|`--sliding-interval-duration-ms`|The time duration after which the window slides|| -|`--subscription-type`|The subscription type used by the function when consuming messages on the input topic(s). Can be either SHARED or EXCLUSIVE|SHARED| +|`--subscriptionType`|The subscription type used by the function when consuming messages on the input topic(s). Can be either SHARED or EXCLUSIVE|SHARED| |`--tenant`|The function’s tenant|| -|`--topics-pattern`|The topic pattern to consume from list of topics under a namespace that match the pattern|| -|`--user-config`|A user-supplied config value, set as a key/value pair. You can set multiple user config values.|| -|`--window-length-count`|The number of messages per window.|| -|`--window-length-duration-ms`|The time duration of the window in milliseconds.|| - +|`--userConfig`|A user-supplied config value, set as a key/value pair. You can set multiple user config values.|| ### `get` Fetch information about an existing Pulsar Function @@ -454,40 +422,6 @@ Options |`--tenant`|The tenant of the function|| -### `restart` -Restarts either all instances or one particular instance of a function - -Usage -```bash -$ pulsar-admin functions restart options -``` - -Options -|Flag|Description|Default| -|---|---|---| -|`--name`|The name of the function|| -|`--namespace`|The namespace of the function|| -|`--tenant`|The tenant of the function|| -|`--instance-id`|The function instanceId; restart all instances if instance-id is not provided|| - - -### `stop` -Temporary stops function instance. (If worker restarts then it reassigns and starts functiona again) - -Usage -```bash -$ pulsar-admin functions stop options -``` - -Options -|Flag|Description|Default| -|---|---|---| -|`--name`|The name of the function|| -|`--namespace`|The namespace of the function|| -|`--tenant`|The tenant of the function|| -|`--instance-id`|The function instanceId; stop all instances if instance-id is not provided|| - - ### `getstatus` Get the status of an existing Pulsar Function @@ -502,7 +436,7 @@ Options |`--name`|The name of the function|| |`--namespace`|The namespace of the function|| |`--tenant`|The tenant of the function|| -|`--instance-id`|The function instanceId; get status of all instances if instance-id is not provided|| + ### `list` List all Pulsar Functions for a specific tenant and namespace @@ -552,8 +486,8 @@ Options |`--name`|The name of the Pulsar Function to trigger|| |`--namespace`|The namespace of the Pulsar Function to trigger|| |`--tenant`|The tenant of the Pulsar Function to trigger|| -|`--trigger-file`|The path to the file containing the data with which the Pulsar Function is to be triggered|| -|`--trigger-value`|The value with which the Pulsar Function is to be triggered|| +|`--triggerFile`|The path to the file containing the data with which the Pulsar Function is to be triggered|| +|`--triggerValue`|The value with which the Pulsar Function is to be triggered|| ## `namespaces` @@ -981,10 +915,8 @@ $ pulsar-admin sink subcommand Subcommands * `create` -* `update` * `delete` * `localrun` -* `available-sinks` ### `create` @@ -998,52 +930,19 @@ $ pulsar-admin sink create options Options |Flag|Description|Default| |----|---|---| -|`--classname`|The sink’s Java class name|| -|`--cpu`|The CPU (in cores) that needs to be allocated per sink instance (applicable only to the Docker runtime)|| -|`--custom-serde-inputs`|The map of input topics to SerDe class names (as a JSON string)|| -|`--custom-schema-inputs`|The map of input topics to Schema types or class names (as a JSON string)|| -|`--disk`|The disk (in bytes) that needs to be allocated per sink instance (applicable only to the Docker runtime)|| -|`--inputs`|The sink’s input topic(s) (multiple topics can be specified as a comma-separated list)|| -|`--archive`|Path to the archive file for the sink|| -|`--name`|The sink’s name|| -|`--namespace`|The sink’s namespace|| -|`--parallelism`|“The sink’s parallelism factor (i.e. the number of sink instances to run).”|| -|`--processing-guarantees`|“The processing guarantees (aka delivery semantics) applied to the sink. Available values: ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE.”|| -|`--ram`|The RAM (in bytes) that needs to be allocated per sink instance (applicable only to the Docker runtime)|| -|`--sink-config`|Sink config key/values|| -|`--sink-config-file`|The path to a YAML config file specifying the sink’s configuration|| -|`--sink-type`|The built-in sinks's connector provider|| -|`--topics-pattern`|TopicsPattern to consume from list of topics under a namespace that match the pattern.|| -|`--tenant`|The sink’s tenant|| - - -### `update` -Submit a Pulsar IO sink connector to run in a Pulsar cluster - -Usage -```bash -$ pulsar-admin sink update options -``` - -Options -|Flag|Description|Default| -|----|---|---| -|`--classname`|The sink’s Java class name|| +|`--className`|The sink’s Java class name|| |`--cpu`|The CPU (in cores) that needs to be allocated per sink instance (applicable only to the Docker runtime)|| -|`--custom-serde-inputs`|The map of input topics to SerDe class names (as a JSON string)|| -|`--custom-schema-inputs`|The map of input topics to Schema types or class names (as a JSON string)|| +|`--customSerdeInputs`|The map of input topics to SerDe class names (as a JSON string)|| |`--disk`|The disk (in bytes) that needs to be allocated per sink instance (applicable only to the Docker runtime)|| |`--inputs`|The sink’s input topic(s) (multiple topics can be specified as a comma-separated list)|| -|`--archive`|Path to the archive file for the sink|| +|`--jar`|Path to the Java jar file for the sink|| |`--name`|The sink’s name|| |`--namespace`|The sink’s namespace|| |`--parallelism`|“The sink’s parallelism factor (i.e. the number of sink instances to run).”|| -|`--processing-guarantees`|“The processing guarantees (aka delivery semantics) applied to the sink. Available values: ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE.”|| +|`--processingGuarantees`|“The processing guarantees (aka delivery semantics) applied to the sink. Available values: ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE.”|| |`--ram`|The RAM (in bytes) that needs to be allocated per sink instance (applicable only to the Docker runtime)|| -|`--sink-config`|Sink config key/values|| -|`--sink-config-file`|The path to a YAML config file specifying the sink’s configuration|| -|`--sink-type`|The built-in sinks's connector provider|| -|`--topics-pattern`|TopicsPattern to consume from list of topics under a namespace that match the pattern.|| +|`--sinkConfig`|Sink config key/values|| +|`--sinkConfigFile`|The path to a YAML config file specifying the sink’s configuration|| |`--tenant`|The sink’s tenant|| @@ -1074,34 +973,23 @@ $ pulsar-admin sink localrun options Options |Flag|Description|Default| |----|---|---| -|`--broker-service-url`|The URL for the Pulsar broker|| -|`--classname`|The sink’s Java class name|| +|`--brokerServiceUrl`|The URL for the Pulsar broker|| +|`--className`|The sink’s Java class name|| |`--cpu`|The CPU (in cores) that needs to be allocated per sink instance (applicable only to the Docker runtime)|| -|`--custom-serde-inputs`|The map of input topics to SerDe class names (as a JSON string)|| -|`--custom-schema-inputs`|The map of input topics to Schema types or class names (as a JSON string)|| +|`--customSerdeInputs`|The map of input topics to SerDe class names (as a JSON string)|| |`--disk`|The disk (in bytes) that needs to be allocated per sink instance (applicable only to the Docker runtime)|| |`--inputs`|The sink’s input topic(s) (multiple topics can be specified as a comma-separated list)|| -|`--archive`|Path to the archive file for the sink|| +|`--jar`|Path to the Java jar file for the sink|| |`--name`|The sink’s name|| |`--namespace`|The sink’s namespace|| |`--parallelism`|“The sink’s parallelism factor (i.e. the number of sink instances to run).”|| -|`--processing-guarantees`|“The processing guarantees (aka delivery semantics) applied to the sink. Available values: ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE.”|| +|`--processingGuarantees`|“The processing guarantees (aka delivery semantics) applied to the sink. Available values: ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE.”|| |`--ram`|The RAM (in bytes) that needs to be allocated per sink instance (applicable only to the Docker runtime)|| -|`--sink-config`|Sink config key/values|| -|`--sink-config-file`|The path to a YAML config file specifying the sink’s configuration|| -|`--sink-type`|The built-in sinks's connector provider|| -|`--topics-pattern`|TopicsPattern to consume from list of topics under a namespace that match the pattern.|| +|`--sinkConfig`|Sink config key/values|| +|`--sinkConfigFile`|The path to a YAML config file specifying the sink’s configuration|| |`--tenant`|The sink’s tenant|| -### `available-sinks` -Get a list of all built-in sink connectors - -Usage -```bash -$ pulsar-admin sink available-sinks -``` - ## `source` An interface for managing Pulsar IO sources (ingress data into Pulsar) @@ -1113,10 +1001,8 @@ $ pulsar-admin source subcommand Subcommands * `create` -* `update` * `delete` * `localrun` -* `available-sources` ### `create` @@ -1130,50 +1016,19 @@ $ pulsar-admin source create options Options |Flag|Description|Default| |----|---|---| -|`--classname`|The source’s Java class name|| -|`--cpu`|The CPU (in cores) that needs to be allocated per source instance (applicable only to the Docker runtime)|| -|`--deserialization-classname`|The SerDe classname for the source|| -|`--destination-topic-name`|The Pulsar topic to which data is sent|| -|`--disk`|The disk (in bytes) that needs to be allocated per source instance (applicable only to the Docker runtime)|| -|`--archive`|The path to the NAR archive for the Source|| -|`--name`|The source’s name|| -|`--namespace`|The source’s namespace|| -|`--parallelism`|The source’s parallelism factor (i.e. the number of source instances to run).|| -|`--processing-guarantees`|“The processing guarantees (aka delivery semantics) applied to the source. Available values: ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE.”|| -|`--ram`|The RAM (in bytes) that needs to be allocated per source instance (applicable only to the Docker runtime)|| -|`--schema-type`|The schema type (either a builtin schema like 'avro', 'json', etc, or custom Schema class name to be used to encode messages emitted from the source|| -|`--source-type`|One of the built-in source's connector provider|| -|`--source-config`|Source config key/values|| -|`--source-config-file`|The path to a YAML config file specifying the source’s configuration|| -|`--tenant`|The source’s tenant|| - - -### `update` -Update a already submitted Pulsar IO source connector - -Usage -```bash -$ pulsar-admin source update options -``` - -Options -|Flag|Description|Default| -|----|---|---| -|`--classname`|The source’s Java class name|| +|`--className`|The source’s Java class name|| |`--cpu`|The CPU (in cores) that needs to be allocated per source instance (applicable only to the Docker runtime)|| -|`--deserialization-classname`|The SerDe classname for the source|| -|`--destination-topic-name`|The Pulsar topic to which data is sent|| +|`--deserializationClassName`|The SerDe classname for the source|| +|`--destinationTopicName`|The Pulsar topic to which data is sent|| |`--disk`|The disk (in bytes) that needs to be allocated per source instance (applicable only to the Docker runtime)|| -|`--archive`|The path to the NAR archive for the Source|| +|`--jar`|Path to the Java jar file for the source|| |`--name`|The source’s name|| |`--namespace`|The source’s namespace|| |`--parallelism`|The source’s parallelism factor (i.e. the number of source instances to run).|| -|`--processing-guarantees`|“The processing guarantees (aka delivery semantics) applied to the source. Available values: ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE.”|| +|`--processingGuarantees`|“The processing guarantees (aka delivery semantics) applied to the source. Available values: ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE.”|| |`--ram`|The RAM (in bytes) that needs to be allocated per source instance (applicable only to the Docker runtime)|| -|`--schema-type`|The schema type (either a builtin schema like 'avro', 'json', etc, or custom Schema class name to be used to encode messages emitted from the source|| -|`--source-type`|One of the built-in source's connector provider|| -|`--source-config`|Source config key/values|| -|`--source-config-file`|The path to a YAML config file specifying the source’s configuration|| +|`--sourceConfig`|Source config key/values|| +|`--sourceConfigFile`|The path to a YAML config file specifying the source’s configuration|| |`--tenant`|The source’s tenant|| @@ -1204,32 +1059,22 @@ $ pulsar-admin source localrun options Options |Flag|Description|Default| |----|---|---| -|`--classname`|The source’s Java class name|| +|`--className`|The source’s Java class name|| |`--cpu`|The CPU (in cores) that needs to be allocated per source instance (applicable only to the Docker runtime)|| -|`--deserialization-classname`|The SerDe classname for the source|| -|`--destination-topic-name`|The Pulsar topic to which data is sent|| +|`--deserializationClassName`|The SerDe classname for the source|| +|`--destinationTopicName`|The Pulsar topic to which data is sent|| |`--disk`|The disk (in bytes) that needs to be allocated per source instance (applicable only to the Docker runtime)|| -|`--archive`|The path to the NAR archive for the Source|| +|`--jar`|Path to the Java jar file for the source|| |`--name`|The source’s name|| |`--namespace`|The source’s namespace|| |`--parallelism`|The source’s parallelism factor (i.e. the number of source instances to run).|| -|`--processing-guarantees`|“The processing guarantees (aka delivery semantics) applied to the source. Available values: ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE.”|| +|`--processingGuarantees`|“The processing guarantees (aka delivery semantics) applied to the source. Available values: ATLEAST_ONCE, ATMOST_ONCE, EFFECTIVELY_ONCE.”|| |`--ram`|The RAM (in bytes) that needs to be allocated per source instance (applicable only to the Docker runtime)|| -|`--schema-type`|The schema type (either a builtin schema like 'avro', 'json', etc, or custom Schema class name to be used to encode messages emitted from the source|| -|`--source-type`|One of the built-in source's connector provider|| -|`--source-config`|Source config key/values|| -|`--source-config-file`|The path to a YAML config file specifying the source’s configuration|| +|`--sourceConfig`|Source config key/values|| +|`--sourceConfigFile`|The path to a YAML config file specifying the source’s configuration|| |`--tenant`|The source’s tenant|| -### `available-sources` -Get a list of all built-in source connectors - -Usage -```bash -$ pulsar-admin source available-sources -``` - ## `topics` Operations for managing Pulsar topics (both persistent and non persistent) diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/sql-deployment-configurations.md b/site2/website/versioned_docs/version-2.1.1-incubating/sql-deployment-configurations.md deleted file mode 100644 index 536e613d74..0000000000 --- a/site2/website/versioned_docs/version-2.1.1-incubating/sql-deployment-configurations.md +++ /dev/null @@ -1,151 +0,0 @@ ---- -id: version-2.1.1-incubating-sql-deployment-configurations -title: Pulsar SQl Deployment and Configuration -sidebar_label: Deployment and Configuration -original_id: sql-deployment-configurations ---- - -## Presto Pulsar Connector Configurations -There are several configurations for the Presto Pulsar Connector. The properties file that contain these configurations can be found at ```${project.root}/conf/presto/catalog/pulsar.properties```. -The configurations for the connector and its default values are discribed below. - -```properties -# name of the connector to be displayed in the catalog -connector.name=pulsar - -# the url of Pulsar broker service -pulsar.broker-service-url=http://localhost:8080 - -# URI of Zookeeper cluster -pulsar.zookeeper-uri=localhost:2181 - -# minimum number of entries to read at a single time -pulsar.entry-read-batch-size=100 - -# default number of splits to use per query -pulsar.target-num-splits=4 -``` - -## Query Pulsar from Existing Presto Cluster - -If you already have an existing Presto cluster, you can copy Presto Pulsar connector plugin to your existing cluster. You can download the archived plugin package via: - -```bash -$ wget pulsar:binary_release_url -``` - -## Deploying a new cluster - -Please note that the [Getting Started](sql-getting-started.md) guide shows you how to easily setup a standalone single node enviroment to experiment with. - -Pulsar SQL is powered by [Presto](https://prestodb.io) thus many of the configurations for deployment is the same for the Pulsar SQL worker. - -You can use the same CLI args as the Presto launcher: - -```bash -$ ./bin/pulsar sql-worker --help -Usage: launcher [options] command - -Commands: run, start, stop, restart, kill, status - -Options: - -h, --help show this help message and exit - -v, --verbose Run verbosely - --etc-dir=DIR Defaults to INSTALL_PATH/etc - --launcher-config=FILE - Defaults to INSTALL_PATH/bin/launcher.properties - --node-config=FILE Defaults to ETC_DIR/node.properties - --jvm-config=FILE Defaults to ETC_DIR/jvm.config - --config=FILE Defaults to ETC_DIR/config.properties - --log-levels-file=FILE - Defaults to ETC_DIR/log.properties - --data-dir=DIR Defaults to INSTALL_PATH - --pid-file=FILE Defaults to DATA_DIR/var/run/launcher.pid - --launcher-log-file=FILE - Defaults to DATA_DIR/var/log/launcher.log (only in - daemon mode) - --server-log-file=FILE - Defaults to DATA_DIR/var/log/server.log (only in - daemon mode) - -D NAME=VALUE Set a Java system property - -``` - -There is a set of default configs for the cluster located in ```${project.root}/conf/presto``` that will be used by default. You can change them to customize your deployment - -You can also set the worker to read from a different configuration directory as well as set a different directory for writing its data: - -```bash -$ ./bin/pulsar sql-worker run --etc-dir /tmp/incubator-pulsar/conf/presto --data-dir /tmp/presto-1 -``` - -You can also start the worker as daemon process: - -```bash -$ ./bin sql-worker start -``` - -### Deploying to a 3 node cluster - -For example, if I wanted to deploy a Pulsar SQL/Presto cluster on 3 nodes, you can do the following: - -First, copy the Pulsar binary distribution to all three nodes. - -The first node, will run the Presto coordinator. The mininal configuration in ```${project.root}/conf/presto/config.properties``` can be the following - -```properties -coordinator=true -node-scheduler.include-coordinator=true -http-server.http.port=8080 -query.max-memory=50GB -query.max-memory-per-node=1GB -discovery-server.enabled=true -discovery.uri=<coordinator-url> -``` - -Also, modify ```pulsar.broker-service-url``` and ```pulsar.zookeeper-uri``` configs in ```${project.root}/conf/presto/catalog/pulsar.properties``` on those nodes accordingly - -Afterwards, you can start the coordinator by just running - -```$ ./bin/pulsar sql-worker run``` - -For the other two nodes that will only serve as worker nodes, the configurations can be the following: - -```properties -coordinator=false -http-server.http.port=8080 -query.max-memory=50GB -query.max-memory-per-node=1GB -discovery.uri=<coordinator-url> - -``` - -Also, modify ```pulsar.broker-service-url``` and ```pulsar.zookeeper-uri``` configs in ```${project.root}/conf/presto/catalog/pulsar.properties``` accordingly - -You can also start the worker by just running: - -```$ ./bin/pulsar sql-worker run``` - -You can check the status of your cluster from the SQL CLI. To start the SQL CLI: - -```bash -$ ./bin/pulsar sql --server <coordinate_url> - -``` - -You can then run the following command to check the status of your nodes: - -```bash -presto> SELECT * FROM system.runtime.nodes; - node_id | http_uri | node_version | coordinator | state ----------+-------------------------+--------------+-------------+-------- - 1 | http://192.168.2.1:8081 | testversion | true | active - 3 | http://192.168.2.2:8081 | testversion | false | active - 2 | http://192.168.2.3:8081 | testversion | false | active -``` - - -For more information about deployment in Presto, please reference: - -[Deploying Presto](https://prestodb.io/docs/current/installation/deployment.html) - diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/sql-getting-started.md b/site2/website/versioned_docs/version-2.1.1-incubating/sql-getting-started.md deleted file mode 100644 index 3fb42a37c4..0000000000 --- a/site2/website/versioned_docs/version-2.1.1-incubating/sql-getting-started.md +++ /dev/null @@ -1,143 +0,0 @@ ---- -id: version-2.1.1-incubating-sql-getting-started -title: Pulsar SQL Getting Started -sidebar_label: Getting Started -original_id: sql-getting-started ---- - -It is super easy to start query data in Pulsar. - -## Requirements -1. **Pulsar distribution** - * If you haven't install Pulsar, please reference [Installing Pulsar](io-quickstart.md#installing-pulsar) -2. **Pulsar built-in connectors** - * If you haven't installed the built-in connectors, please reference [Installing Builtin Connectors](io-quickstart.md#installing-builtin-connectors) - -First, start a Pulsar standalone cluster: - -```bash -./bin/pulsar standalone -``` - -Next, start a Pulsar SQL worker: -```bash -./bin/pulsar sql-worker run -``` - -After both the Pulsar standalone cluster and the SQL worker are done initializing, run the SQL CLI: -```bash -./bin/pulsar sql -``` - -You can now start typing some SQL commands: - - -```bash -presto> show catalogs; - Catalog ---------- - pulsar - system -(2 rows) - -Query 20180829_211752_00004_7qpwh, FINISHED, 1 node -Splits: 19 total, 19 done (100.00%) -0:00 [0 rows, 0B] [0 rows/s, 0B/s] - - -presto> show schemas in pulsar; - Schema ------------------------ - information_schema - public/default - public/functions - sample/standalone/ns1 -(4 rows) - -Query 20180829_211818_00005_7qpwh, FINISHED, 1 node -Splits: 19 total, 19 done (100.00%) -0:00 [4 rows, 89B] [21 rows/s, 471B/s] - - -presto> show tables in pulsar."public/default"; - Table -------- -(0 rows) - -Query 20180829_211839_00006_7qpwh, FINISHED, 1 node -Splits: 19 total, 19 done (100.00%) -0:00 [0 rows, 0B] [0 rows/s, 0B/s] - -``` - -Currently, there is no data in Pulsar that we can query. Lets start the built-in connector _DataGeneratorSource_ to ingest some mock data for us to query: - -```bash -./bin/pulsar-admin source create --tenant test-tenant --namespace test-namespace --name generator --destinationTopicName generator_test --source-type data-generator -``` - -Afterwards, the will be a topic with can query in the namespace "public/default": - -```bash -presto> show tables in pulsar."public/default"; - Table ----------------- - generator_test -(1 row) - -Query 20180829_213202_00000_csyeu, FINISHED, 1 node -Splits: 19 total, 19 done (100.00%) -0:02 [1 rows, 38B] [0 rows/s, 17B/s] -``` - -We can now query the data within the topic "generator_test": - -```bash -presto> select * from pulsar."public/default".generator_test; - - firstname | middlename | lastname | email | username | password | telephonenumber | age | companyemail | nationalidentitycardnumber | --------------+-------------+-------------+----------------------------------+--------------+----------+-----------------+-----+-----------------------------------------------+----------------------------+ - Genesis | Katherine | Wiley | genesis.wi...@gmail.com | genesisw | y9D2dtU3 | 959-197-1860 | 71 | genesis.wi...@interdemconsulting.eu | 880-58-9247 | - Brayden | | Stanton | brayden.stan...@yahoo.com | braydens | ZnjmhXik | 220-027-867 | 81 | brayden.stan...@supermemo.eu | 604-60-7069 | - Benjamin | Julian | Velasquez | benjamin.velasq...@yahoo.com | benjaminv | 8Bc7m3eb | 298-377-0062 | 21 | benjamin.velasq...@hostesltd.biz | 213-32-5882 | - Michael | Thomas | Donovan | dono...@mail.com | michaeld | OqBm9MLs | 078-134-4685 | 55 | michael.dono...@memortech.eu | 443-30-3442 | - Brooklyn | Avery | Roach | brooklynro...@yahoo.com | broach | IxtBLafO | 387-786-2998 | 68 | brooklyn.ro...@warst.biz | 085-88-3973 | - Skylar | | Bradshaw | skylarbrads...@yahoo.com | skylarb | p6eC6cKy | 210-872-608 | 96 | skylar.brads...@flyhigh.eu | 453-46-0334 | -. -. -. -``` - -Now, you have some mock data to query and play around with! - -If you want to try to ingest some of your own data to play around with, you can write a simple producer to write custom defined data to Pulsar. - -For example: - -```java -public class Test { - - public static class Foo { - private int field1 = 1; - private String field2; - private long field3; - } - - public static void main(String[] args) throws Exception { - PulsarClient pulsarClient = PulsarClient.builder().serviceUrl("pulsar://localhost:6650").build(); - Producer<Foo> producer = pulsarClient.newProducer(AvroSchema.of(Foo.class)).topic("test_topic").create(); - - for (int i = 0; i < 1000; i++) { - Foo foo = new Foo(); - foo.setField1(i); - foo.setField2("foo" + i); - foo.setField3(System.currentTimeMillis()); - producer.newMessage().value(foo).send(); - } - producer.close(); - pulsarClient.close(); - } -} -``` - -Afterwards, you should be able query the data you just wrote. diff --git a/site2/website/versioned_docs/version-2.1.1-incubating/sql-overview.md b/site2/website/versioned_docs/version-2.1.1-incubating/sql-overview.md deleted file mode 100644 index 494fa9db1f..0000000000 --- a/site2/website/versioned_docs/version-2.1.1-incubating/sql-overview.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -id: version-2.1.1-incubating-sql-overview -title: Pulsar SQL Overview -sidebar_label: Overview -original_id: sql-overview ---- - -One of the common use cases of Pulsar is storing streams of event data. Often the event data is structured which predefined fields. There is tremendous value for users to be able to query the existing data that is already stored in Pulsar topics. With the implementation of the [Schema Registry](concepts-schema-registry.md), structured data can be stored in Pulsar and allows for the potential to query that data via SQL language. - -By leveraging [Presto](https://prestodb.io/), we have created a method for users to be able to query structured data stored within Pulsar in a very efficient and scalable manner. We will discuss why this very efficient and scalable in the [Performance](#performance) section below. - -At the core of this Pulsar SQL is the Presto Pulsar connector which allows Presto workers within a Presto cluster to query data from Pulsar. - - -![The Pulsar consumer and reader interfaces](assets/pulsar-sql-arch-2.png) - - -## Performance - -The reason why query performance is very efficient and highly scalable because of Pulsar's [two level segment based architecture](concepts-architecture-overview.md#apache-bookkeeper). - -Topics in Pulsar are stored as segments in [Apache Bookkeeper](https://bookkeeper.apache.org/). Each topic segment is also replicated to a configurable (default 3) number of Bookkeeper nodes which allows for concurrent reads and high read throughput. In the Presto Pulsar connector, we read data directly from Bookkeeper to take advantage of the Pulsar's segment based architecture. Thus, Presto workers can read concurrently from horizontally scalable number bookkeeper nodes. - - -![The Pulsar consumer and reader interfaces](assets/pulsar-sql-arch-1.png) diff --git a/site2/website/versioned_sidebars/version-2.1.1-incubating-sidebars.json b/site2/website/versioned_sidebars/version-2.1.1-incubating-sidebars.json new file mode 100644 index 0000000000..070d23df6e --- /dev/null +++ b/site2/website/versioned_sidebars/version-2.1.1-incubating-sidebars.json @@ -0,0 +1,116 @@ +{ + "version-2.1.1-incubating-docs": { + "Getting started": [ + "version-2.1.1-incubating-pulsar-2.0", + "version-2.1.1-incubating-standalone", + "version-2.1.1-incubating-standalone-docker", + "version-2.1.1-incubating-client-libraries" + ], + "Concepts and Architecture": [ + "version-2.1.1-incubating-concepts-overview", + "version-2.1.1-incubating-concepts-messaging", + "version-2.1.1-incubating-concepts-architecture-overview", + "version-2.1.1-incubating-concepts-clients", + "version-2.1.1-incubating-concepts-replication", + "version-2.1.1-incubating-concepts-multi-tenancy", + "version-2.1.1-incubating-concepts-authentication", + "version-2.1.1-incubating-concepts-topic-compaction", + "version-2.1.1-incubating-concepts-tiered-storage", + "version-2.1.1-incubating-concepts-schema-registry" + ], + "Pulsar Functions": [ + "version-2.1.1-incubating-functions-overview", + "version-2.1.1-incubating-functions-quickstart", + "version-2.1.1-incubating-functions-api", + "version-2.1.1-incubating-functions-deploying", + "version-2.1.1-incubating-functions-guarantees", + "version-2.1.1-incubating-functions-state", + "version-2.1.1-incubating-functions-metrics" + ], + "Pulsar IO": [ + "version-2.1.1-incubating-io-overview", + "version-2.1.1-incubating-io-quickstart", + "version-2.1.1-incubating-io-managing", + "version-2.1.1-incubating-io-connectors", + "version-2.1.1-incubating-io-develop" + ], + "Deployment": [ + "version-2.1.1-incubating-deploy-aws", + "version-2.1.1-incubating-deploy-kubernetes", + "version-2.1.1-incubating-deploy-bare-metal", + "version-2.1.1-incubating-deploy-bare-metal-multi-cluster", + "version-2.1.1-incubating-deploy-dcos", + "version-2.1.1-incubating-deploy-monitoring" + ], + "Administration": [ + "version-2.1.1-incubating-administration-zk-bk", + "version-2.1.1-incubating-administration-geo", + "version-2.1.1-incubating-administration-dashboard", + "version-2.1.1-incubating-administration-stats", + "version-2.1.1-incubating-administration-load-distribution", + "version-2.1.1-incubating-administration-proxy" + ], + "Security": [ + "version-2.1.1-incubating-security-overview", + "version-2.1.1-incubating-security-tls-transport", + "version-2.1.1-incubating-security-tls-authentication", + "version-2.1.1-incubating-security-athenz", + "version-2.1.1-incubating-security-authorization", + "version-2.1.1-incubating-security-encryption", + "version-2.1.1-incubating-security-extending" + ], + "Client libraries": [ + "version-2.1.1-incubating-client-libraries-java", + "version-2.1.1-incubating-client-libraries-go", + "version-2.1.1-incubating-client-libraries-python", + "version-2.1.1-incubating-client-libraries-cpp", + "version-2.1.1-incubating-client-libraries-websocket" + ], + "Admin API": [ + "version-2.1.1-incubating-admin-api-overview", + "version-2.1.1-incubating-admin-api-clusters", + "version-2.1.1-incubating-admin-api-tenants", + "version-2.1.1-incubating-admin-api-brokers", + "version-2.1.1-incubating-admin-api-namespaces", + "version-2.1.1-incubating-admin-api-permissions", + "version-2.1.1-incubating-admin-api-persistent-topics", + "version-2.1.1-incubating-admin-api-non-persistent-topics", + "version-2.1.1-incubating-admin-api-partitioned-topics", + "version-2.1.1-incubating-admin-api-schemas" + ], + "Adaptors": [ + "version-2.1.1-incubating-adaptors-kafka", + "version-2.1.1-incubating-adaptors-spark", + "version-2.1.1-incubating-adaptors-storm" + ], + "Cookbooks": [ + "version-2.1.1-incubating-cookbooks-tiered-storage", + "version-2.1.1-incubating-cookbooks-compaction", + "version-2.1.1-incubating-cookbooks-deduplication", + "version-2.1.1-incubating-cookbooks-non-persistent", + "version-2.1.1-incubating-cookbooks-partitioned", + "version-2.1.1-incubating-cookbooks-retention-expiry", + "version-2.1.1-incubating-cookbooks-encryption", + "version-2.1.1-incubating-cookbooks-message-queue" + ], + "Development": [ + "version-2.1.1-incubating-develop-tools", + "version-2.1.1-incubating-develop-binary-protocol", + "version-2.1.1-incubating-develop-schema", + "version-2.1.1-incubating-develop-load-manager", + "version-2.1.1-incubating-develop-cpp" + ], + "Reference": [ + "version-2.1.1-incubating-reference-terminology", + "version-2.1.1-incubating-reference-cli-tools", + "version-2.1.1-incubating-pulsar-admin", + "version-2.1.1-incubating-reference-configuration" + ] + }, + "version-2.1.1-incubating-docs-other": { + "First Category": [ + "version-2.1.1-incubating-doc4", + "version-2.1.1-incubating-doc5" + ] + } +} ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services