This is an automated email from the ASF dual-hosted git repository.
sijie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pulsar.git
The following commit(s) were added to refs/heads/master by this push:
new f7c5696 adding pulsar io docs (#2001)
f7c5696 is described below
commit f7c569614d7d1cdccd306ff5aad3ed7c21e53f2a
Author: Boyang Jerry Peng <[email protected]>
AuthorDate: Mon Jun 25 10:29:21 2018 -0700
adding pulsar io docs (#2001)
---
.../java/org/apache/pulsar/admin/cli/CmdSinks.java | 19 ++-
.../org/apache/pulsar/admin/cli/CmdSources.java | 27 ++--
site/_data/cli/pulsar-admin.yaml | 158 +++++++++++++++++++++
site/_data/connectors.yaml | 49 +++++++
site/_data/sidebar.yaml | 2 +
site/_includes/{figure.html => connectors.html} | 25 +++-
site/_includes/figure.html | 9 +-
site/_sass/_docs.scss | 18 +++
site/docs/latest/cookbooks/pulsar-io.md | 82 +++++++++++
.../getting-started/ConceptsAndArchitecture.md | 28 ++++
site/img/pulsar-io.png | Bin 0 -> 37316 bytes
11 files changed, 389 insertions(+), 28 deletions(-)
diff --git
a/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSinks.java
b/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSinks.java
index b5993e3..9d4595b 100644
---
a/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSinks.java
+++
b/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSinks.java
@@ -64,7 +64,7 @@ import static
org.apache.pulsar.functions.utils.Utils.getSinkType;
import static org.apache.pulsar.functions.worker.Utils.downloadFromHttpUrl;
@Getter
-@Parameters(commandDescription = "Interface for managing Pulsar Sinks (Egress
data from Pulsar)")
+@Parameters(commandDescription = "Interface for managing Pulsar IO sinks
(egress data from Pulsar)")
public class CmdSinks extends CmdBase {
private final CreateSink createSink;
@@ -102,7 +102,7 @@ public class CmdSinks extends CmdBase {
abstract void runCmd() throws Exception;
}
- @Parameters(commandDescription = "Run the Pulsar sink locally (rather than
deploying it to the Pulsar cluster)")
+ @Parameters(commandDescription = "Run a Pulsar IO sink connector locally
(rather than deploying it to the Pulsar cluster)")
class LocalSinkRunner extends CreateSink {
@Parameter(names = "--brokerServiceUrl", description = "The URL for
the Pulsar broker")
@@ -139,7 +139,7 @@ public class CmdSinks extends CmdBase {
}
}
- @Parameters(commandDescription = "Create Pulsar sink connectors")
+ @Parameters(commandDescription = "Submit a Pulsar IO sink connector to run
in a Pulsar cluster")
class CreateSink extends SinkCommand {
@Override
void runCmd() throws Exception {
@@ -152,7 +152,7 @@ public class CmdSinks extends CmdBase {
}
}
- @Parameters(commandDescription = "Update Pulsar sink connectors")
+ @Parameters(commandDescription = "Update a Pulsar IO sink connector")
class UpdateSink extends SinkCommand {
@Override
void runCmd() throws Exception {
@@ -165,7 +165,6 @@ public class CmdSinks extends CmdBase {
}
}
- @Parameters(commandDescription = "Create Pulsar sink connectors")
abstract class SinkCommand extends BaseCommand {
@Parameter(names = "--tenant", description = "The sink's tenant")
protected String tenant;
@@ -181,7 +180,7 @@ public class CmdSinks extends CmdBase {
protected String topicsPattern;
@Parameter(names = "--customSerdeInputs", description = "The map of
input topics to SerDe class names (as a JSON string)")
protected String customSerdeInputString;
- @Parameter(names = "--processingGuarantees", description = "The
processing guarantees (aka delivery semantics) applied to the Sink")
+ @Parameter(names = "--processingGuarantees", description = "The
processing guarantees (aka delivery semantics) applied to the sink")
protected FunctionConfig.ProcessingGuarantees processingGuarantees;
@Parameter(names = "--parallelism", description = "The sink's
parallelism factor (i.e. the number of sink instances to run)")
protected Integer parallelism;
@@ -191,11 +190,11 @@ public class CmdSinks extends CmdBase {
@Parameter(names = "--sinkConfigFile", description = "The path to a
YAML config file specifying the "
+ "sink's configuration")
protected String sinkConfigFile;
- @Parameter(names = "--cpu", description = "The cpu in cores that need
to be allocated per function instance(applicable only to docker runtime)")
+ @Parameter(names = "--cpu", description = "The CPU (in cores) that
needs to be allocated per sink instance (applicable only to Docker runtime)")
protected Double cpu;
- @Parameter(names = "--ram", description = "The ram in bytes that need
to be allocated per function instance(applicable only to process/docker
runtime)")
+ @Parameter(names = "--ram", description = "The RAM (in bytes) that
need to be allocated per sink instance (applicable only to the process and
Docker runtimes)")
protected Long ram;
- @Parameter(names = "--disk", description = "The disk in bytes that
need to be allocated per function instance(applicable only to docker runtime)")
+ @Parameter(names = "--disk", description = "The disk (in bytes) that
need to be allocated per sink instance (applicable only to Docker runtime)")
protected Long disk;
@Parameter(names = "--sinkConfig", description = "Sink config
key/values")
protected String sinkConfigString;
@@ -411,7 +410,7 @@ public class CmdSinks extends CmdBase {
}
}
- @Parameters(commandDescription = "Stops a Pulsar sink or source")
+ @Parameters(commandDescription = "Stops a Pulsar IO sink connector")
class DeleteSink extends BaseCommand {
@Parameter(names = "--tenant", description = "The tenant of the sink")
diff --git
a/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSources.java
b/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSources.java
index b3e49fd..2ff338d 100644
---
a/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSources.java
+++
b/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSources.java
@@ -42,7 +42,6 @@ import org.apache.pulsar.functions.utils.SourceConfig;
import org.apache.pulsar.functions.utils.Utils;
import org.apache.pulsar.functions.utils.validation.ConfigValidation;
import
org.apache.pulsar.functions.utils.validation.ValidatorImpls.ImplementsClassValidator;
-import org.apache.pulsar.io.core.Sink;
import org.apache.pulsar.io.core.Source;
import java.io.File;
@@ -52,18 +51,15 @@ import java.lang.reflect.Type;
import java.net.MalformedURLException;
import java.util.Map;
-import static org.apache.commons.lang3.StringUtils.isBlank;
-import static org.apache.commons.lang3.StringUtils.isNotBlank;
import static org.apache.pulsar.common.naming.TopicName.DEFAULT_NAMESPACE;
import static org.apache.pulsar.common.naming.TopicName.PUBLIC_TENANT;
import static
org.apache.pulsar.functions.utils.Utils.convertProcessingGuarantee;
import static org.apache.pulsar.functions.utils.Utils.fileExists;
-import static org.apache.pulsar.functions.utils.Utils.getSinkType;
import static org.apache.pulsar.functions.utils.Utils.getSourceType;
import static org.apache.pulsar.functions.worker.Utils.downloadFromHttpUrl;
@Getter
-@Parameters(commandDescription = "Interface for managing Pulsar Source
(Ingress data to Pulsar)")
+@Parameters(commandDescription = "Interface for managing Pulsar IO Sources
(ingress data into Pulsar)")
public class CmdSources extends CmdBase {
private final CreateSource createSource;
@@ -101,7 +97,7 @@ public class CmdSources extends CmdBase {
abstract void runCmd() throws Exception;
}
- @Parameters(commandDescription = "Run the Pulsar source locally (rather
than deploying it to the Pulsar cluster)")
+ @Parameters(commandDescription = "Run a Pulsar IO source connector locally
(rather than deploying it to the Pulsar cluster)")
class LocalSourceRunner extends CreateSource {
@Parameter(names = "--brokerServiceUrl", description = "The URL for
the Pulsar broker")
@@ -138,7 +134,7 @@ public class CmdSources extends CmdBase {
}
}
- @Parameters(commandDescription = "Create Pulsar source connectors")
+ @Parameters(commandDescription = "Submit a Pulsar IO source connector to
run in a Pulsar cluster")
public class CreateSource extends SourceCommand {
@Override
void runCmd() throws Exception {
@@ -151,7 +147,7 @@ public class CmdSources extends CmdBase {
}
}
- @Parameters(commandDescription = "Update Pulsar source connectors")
+ @Parameters(commandDescription = "Update a Pulsar IO source connector")
public class UpdateSource extends SourceCommand {
@Override
void runCmd() throws Exception {
@@ -175,23 +171,22 @@ public class CmdSources extends CmdBase {
protected FunctionConfig.ProcessingGuarantees processingGuarantees;
@Parameter(names = "--className", description = "The source's class
name")
protected String className;
- @Parameter(names = "--destinationTopicName", description = "Pulsar
topic to ingress data to")
+ @Parameter(names = "--destinationTopicName", description = "The Pulsar
topic to which data is sent")
protected String destinationTopicName;
- @Parameter(names = "--deserializationClassName", description = "The
classname for SerDe class for the source")
+ @Parameter(names = "--deserializationClassName", description = "The
SerDe classname for the source")
protected String deserializationClassName;
@Parameter(names = "--parallelism", description = "The source's
parallelism factor (i.e. the number of source instances to run)")
protected Integer parallelism;
- @Parameter(names = "--jar", description = "Path to the jar file for
the Source. It also supports url-path [http/https/file (file protocol assumes
that file already exists on worker host)] from which worker can download the
package.", listConverter = StringConverter.class)
+ @Parameter(names = "--jar", description = "The path to the jar file
for the Source. It also supports url-path [http/https/file (file protocol
assumes that file already exists on worker host)] from which worker can
download the package.", listConverter = StringConverter.class)
protected String jarFile;
-
@Parameter(names = "--sourceConfigFile", description = "The path to a
YAML config file specifying the "
+ "source's configuration")
protected String sourceConfigFile;
- @Parameter(names = "--cpu", description = "The cpu in cores that need
to be allocated per function instance(applicable only to docker runtime)")
+ @Parameter(names = "--cpu", description = "The CPU (in cores) that
needs to be allocated per source instance (applicable only to Docker runtime)")
protected Double cpu;
- @Parameter(names = "--ram", description = "The ram in bytes that need
to be allocated per function instance(applicable only to process/docker
runtime)")
+ @Parameter(names = "--ram", description = "The RAM (in bytes) that
need to be allocated per source instance (applicable only to the process and
Docker runtimes)")
protected Long ram;
- @Parameter(names = "--disk", description = "The disk in bytes that
need to be allocated per function instance(applicable only to docker runtime)")
+ @Parameter(names = "--disk", description = "The disk (in bytes) that
need to be allocated per source instance (applicable only to Docker runtime)")
protected Long disk;
@Parameter(names = "--sourceConfig", description = "Source config
key/values")
protected String sourceConfigString;
@@ -391,7 +386,7 @@ public class CmdSources extends CmdBase {
}
}
- @Parameters(commandDescription = "Stops a Pulsar source")
+ @Parameters(commandDescription = "Stops a Pulsar IO source connector")
class DeleteSource extends BaseCommand {
@Parameter(names = "--tenant", description = "The tenant of a sink or
source")
diff --git a/site/_data/cli/pulsar-admin.yaml b/site/_data/cli/pulsar-admin.yaml
index 46fc0a4..c0d483f 100644
--- a/site/_data/cli/pulsar-admin.yaml
+++ b/site/_data/cli/pulsar-admin.yaml
@@ -510,6 +510,164 @@ commands:
argument: cluster-name
- name: delete
description: Delete namespace isolation policy of a cluster. This
operation requires superuser privileges.
+- name: sink
+ description: An interface for managing Pulsar IO sinks (egress data from
Pulsar)
+ subcommands:
+ - name: create
+ description: Submit a Pulsar IO sink connector to run in a Pulsar cluster
+ options:
+ - flags: --className
+ description: The sink's Java class name
+ - flags: --cpu
+ description: The CPU (in cores) that needs to be allocated per sink
instance (applicable only to the Docker runtime)
+ - flags: --customSerdeInputs
+ description: The map of input topics to SerDe class names (as a JSON
string)
+ - flags: --disk
+ description: The disk (in bytes) that needs to be allocated per sink
instance (applicable only to the Docker runtime)
+ - flags: --inputs
+ description: The sink's input topic(s) (multiple topics can be specified
as a comma-separated list)
+ - flags: --jar
+ description: Path to the Java jar file for the sink
+ - flags: --name
+ description: The sink's name
+ - flags: --namespace
+ description: The sink's namespace
+ - flags: --parallelism
+ description: |
+ "The sink's parallelism factor (i.e. the number of sink instances to
run)."
+ - flags: --processingGuarantees
+ description: |
+ "The processing guarantees (aka delivery semantics) applied to the
sink. Available values: `ATLEAST_ONCE`, `ATMOST_ONCE`, `EFFECTIVELY_ONCE`."
+ - flags: --ram
+ description: The RAM (in bytes) that needs to be allocated per sink
instance (applicable only to the Docker runtime)
+ - flags: --sinkConfig
+ description: Sink config key/values
+ - flags: --sinkConfigFile
+ description: The path to a YAML config file specifying the sink's
configuration
+ - flags: --tenant
+ description: The sink's tenant
+ - name: delete
+ description: Stops a Pulsar IO sink
+ options:
+ - flags: --name
+ description: The name of the sink
+ - flags: --namespace
+ description: The namespace of the sink
+ - flags: --tenant
+ description: The tenant of the sink
+ - name: localrun
+ description: Run the Pulsar sink locally (rather than in the Pulsar
cluster)
+ options:
+ - flags: --brokerServiceUrl
+ description: The URL for the Pulsar broker
+ - flags: --className
+ description: The sink's Java class name
+ - flags: --cpu
+ description: The CPU (in cores) that needs to be allocated per sink
instance (applicable only to the Docker runtime)
+ - flags: --customSerdeInputs
+ description: The map of input topics to SerDe class names (as a JSON
string)
+ - flags: --disk
+ description: The disk (in bytes) that needs to be allocated per sink
instance (applicable only to the Docker runtime)
+ - flags: --inputs
+ description: The sink's input topic(s) (multiple topics can be specified
as a comma-separated list)
+ - flags: --jar
+ description: Path to the Java jar file for the sink
+ - flags: --name
+ description: The sink's name
+ - flags: --namespace
+ description: The sink's namespace
+ - flags: --parallelism
+ description: |
+ "The sink's parallelism factor (i.e. the number of sink instances to
run)."
+ - flags: --processingGuarantees
+ description: |
+ "The processing guarantees (aka delivery semantics) applied to the
sink. Available values: `ATLEAST_ONCE`, `ATMOST_ONCE`, `EFFECTIVELY_ONCE`."
+ - flags: --ram
+ description: The RAM (in bytes) that needs to be allocated per sink
instance (applicable only to the Docker runtime)
+ - flags: --sinkConfig
+ description: Sink config key/values
+ - flags: --sinkConfigFile
+ description: The path to a YAML config file specifying the sink's
configuration
+ - flags: --tenant
+ description: The sink's tenant
+- name: source
+ description: An interface for managing Pulsar IO sources (ingress data into
Pulsar)
+ subcommands:
+ - name: create
+ description: Submit a Pulsar IO source connector to run in a Pulsar cluster
+ options:
+ - flags: --className
+ description: The source's Java class name
+ - flags: --cpu
+ description: The CPU (in cores) that needs to be allocated per source
instance (applicable only to the Docker runtime)
+ - flags: --deserializationClassName
+ description: The SerDe classname for the source
+ - flags: --destinationTopicName
+ description: The Pulsar topic to which data is sent
+ - flags: --disk
+ description: The disk (in bytes) that needs to be allocated per source
instance (applicable only to the Docker runtime)
+ - flags: --jar
+ description: Path to the Java jar file for the source
+ - flags: --name
+ description: The source's name
+ - flags: --namespace
+ description: The source's namespace
+ - flags: --parallelism
+ description: |
+ The source's parallelism factor (i.e. the number of source instances
to run).
+ - flags: --processingGuarantees
+ description: |
+ "The processing guarantees (aka delivery semantics) applied to the
source. Available values: `ATLEAST_ONCE`, `ATMOST_ONCE`, `EFFECTIVELY_ONCE`."
+ - flags: --ram
+ description: The RAM (in bytes) that needs to be allocated per source
instance (applicable only to the Docker runtime)
+ - flags: --sourceConfig
+ description: Source config key/values
+ - flags: --sourceConfigFile
+ description: The path to a YAML config file specifying the source's
configuration
+ - flags: --tenant
+ description: The source's tenant
+ - name: delete
+ description: Stops a Pulsar IO source
+ options:
+ - flags: --name
+ description: The name of the source
+ - flags: --namespace
+ description: The namespace of the source
+ - flags: --tenant
+ description: The tenant of the source
+ - name: localrun
+ description: Run the Pulsar source locally (rather than in the Pulsar
cluster)
+ options:
+ - flags: --className
+ description: The source's Java class name
+ - flags: --cpu
+ description: The CPU (in cores) that needs to be allocated per source
instance (applicable only to the Docker runtime)
+ - flags: --deserializationClassName
+ description: The SerDe classname for the source
+ - flags: --destinationTopicName
+ description: The Pulsar topic to which data is sent
+ - flags: --disk
+ description: The disk (in bytes) that needs to be allocated per source
instance (applicable only to the Docker runtime)
+ - flags: --jar
+ description: Path to the Java jar file for the source
+ - flags: --name
+ description: The source's name
+ - flags: --namespace
+ description: The source's namespace
+ - flags: --parallelism
+ description: |
+ The source's parallelism factor (i.e. the number of source instances
to run).
+ - flags: --processingGuarantees
+ description: |
+ "The processing guarantees (aka delivery semantics) applied to the
source. Available values: `ATLEAST_ONCE`, `ATMOST_ONCE`, `EFFECTIVELY_ONCE`."
+ - flags: --ram
+ description: The RAM (in bytes) that needs to be allocated per source
instance (applicable only to the Docker runtime)
+ - flags: --sourceConfig
+ description: Source config key/values
+ - flags: --sourceConfigFile
+ description: The path to a YAML config file specifying the source's
configuration
+ - flags: --tenant
+ description: The source's tenant
- name: topics
description: Operations for managing Pulsar topics (both persistent and non
persistent)
subcommands:
diff --git a/site/_data/connectors.yaml b/site/_data/connectors.yaml
new file mode 100644
index 0000000..37d332c
--- /dev/null
+++ b/site/_data/connectors.yaml
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+- name: Aerospike sink
+ url: https://www.aerospike.com/
+ class:
+ group: aerospike
+ name: AerospikeSink
+- name: Cassandra sink
+ url: https://cassandra.apache.org
+ class:
+ group: cassandra
+ name: CassandraSink
+- name: Kafka source
+ url: https://kafka.apache.org
+ class:
+ group: kafka
+ name: KafkaSource
+- name: Kafka sink
+ url: https://kafka.apache.org
+ class:
+ group: kafka
+ name: KafkaSink
+- name: RabbitMQ source
+ url: https://www.rabbitmq.com
+ class:
+ group: rabbitmq
+ name: RabbitMQSource
+- name: Twitter Firehose source
+ url: https://developer.twitter.com/en/docs
+ class:
+ group: twitter
+ name: TwitterFireHose
\ No newline at end of file
diff --git a/site/_data/sidebar.yaml b/site/_data/sidebar.yaml
index b061e44..cf08760 100644
--- a/site/_data/sidebar.yaml
+++ b/site/_data/sidebar.yaml
@@ -136,6 +136,8 @@ groups:
- title: Cookbooks
dir: cookbooks
docs:
+ - title: Pulsar IO
+ endpoint: pulsar-io
- title: Tiered Storage
endpoint: tiered-storage
- title: Topic compaction
diff --git a/site/_includes/figure.html b/site/_includes/connectors.html
similarity index 53%
copy from site/_includes/figure.html
copy to site/_includes/connectors.html
index cfda529..99c303a 100644
--- a/site/_includes/figure.html
+++ b/site/_includes/connectors.html
@@ -18,4 +18,27 @@
under the License.
-->
-<img src="{{ include.src }}"{{ if include.width }} width="{{ include.width
}}%"{{ endif }}{{ if include.alt }} alt="{{ include.alt }}"{{ endif }}>
\ No newline at end of file
+<table>
+ <thead>
+ <tr>
+ <th>Name</th>
+ <th>Java class</th>
+ </tr>
+ </thead>
+ <tbody>
+ {% for connector in site.data.connectors %}
+ <tr>
+ <td>
+ <a href="{{ connector.url }}">
+ {{ connector.name }}
+ </a>
+ </td>
+ <td>
+ <a
href="https://github.com/apache/incubator-pulsar/blob/master/pulsar-io/{{
connector.class.group }}/src/main/java/org/apache/pulsar/connect/{{
connector.class.group }}/{{ connector.class.name }}.java">
+ <code>org.apache.pulsar.io.{{ connector.class.group }}.{{
connector.class.name }}</code>
+ </a>
+ </td>
+ </tr>
+ {% endfor %}
+ </tbody>
+</table>
\ No newline at end of file
diff --git a/site/_includes/figure.html b/site/_includes/figure.html
index cfda529..b69fda5 100644
--- a/site/_includes/figure.html
+++ b/site/_includes/figure.html
@@ -18,4 +18,11 @@
under the License.
-->
-<img src="{{ include.src }}"{{ if include.width }} width="{{ include.width
}}%"{{ endif }}{{ if include.alt }} alt="{{ include.alt }}"{{ endif }}>
\ No newline at end of file
+<figure>
+ <img src="{{ include.src }}"{% if include.width %} width="{{ include.width
}}%"{% endif %}{% if include.alt %} alt="{{ include.alt }}"{% endif %}>
+ {% if include.caption %}
+ <figcaption>
+ {{ include.caption }}
+ </figcaption>
+ {% endif %}
+</figure>
diff --git a/site/_sass/_docs.scss b/site/_sass/_docs.scss
index 8ce3e57..111f9db 100644
--- a/site/_sass/_docs.scss
+++ b/site/_sass/_docs.scss
@@ -162,6 +162,24 @@
margin-right: 2px;
}
+ figure {
+ margin: 2rem 0;
+
+ img {
+ display: block;
+ max-width: 100%;
+ margin: 0 auto;
+ }
+
+ figcaption {
+ text-align: center;
+ margin-top: 1rem;
+ font-size: 1.5rem;
+ font-weight: 500;
+ }
+ }
+
+
p img {
display: block;
margin: 20px auto;
diff --git a/site/docs/latest/cookbooks/pulsar-io.md
b/site/docs/latest/cookbooks/pulsar-io.md
new file mode 100644
index 0000000..a5bf0ee
--- /dev/null
+++ b/site/docs/latest/cookbooks/pulsar-io.md
@@ -0,0 +1,82 @@
+---
+title: The Pulsar IO cookbook
+---
+
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+[Pulsar IO](../../getting-started/ConceptsAndArchitecture#pulsar-io) is a
feature of Pulsar that enables you to easily create and manage **connectors**
that interface with external systems, such as databases and other messaging
systems.
+
+## Setup
+
+In order to run Pulsar IO connectors, you'll need to have a binary
distribution of pulsar locally.
+
+## Managing connectors
+
+Pulsar connectors can be managed using the
[`source`](../../reference/CliTools#pulsar-admin-source) and
[`sink`](../../reference/CliTools#pulsar-admin-sink) commands of the
[`pulsar-admin`](../../reference/CliTools#pulsar-admin) CLI tool.
+
+### Running sources
+
+You can use the [`create`](../../reference/CliTools#pulsar-admin-source-create)
+
+You can submit a sink to be run in an existing Pulsar cluster using a command
of this form:
+
+```bash
+$ ./bin/pulsar-admin sink create --className <classname> --jar <jar-location>
--tenant test --namespace <namespace> --name <sink-name> --inputs <input-topics>
+```
+
+Here’s an example command:
+
+```bash
+bin/pulsar-admin source create --className
org.apache.pulsar.io.twitter.TwitterFireHose --jar ~/application.jar --tenant
test --namespace ns1 --name twitter-source --destinationTopicName twitter_data
+```
+
+Instead of submitting a source to run on an existing Pulsar cluster, you
alternatively can run a source as a process on your local machine:
+
+```bash
+bin/pulsar-admin source localrun --className
org.apache.pulsar.io.twitter.TwitterFireHose --jar ~/application.jar --tenant
test --namespace ns1 --name twitter-source --destinationTopicName twitter_data
+```
+
+### Running Sinks
+
+You can submit a sink to be run in an existing Pulsar cluster using a command
of this form:
+
+```bash
+./bin/pulsar-admin sink create --className <classname> --jar <jar-location>
--tenant test --namespace <namespace> --name <sink-name> --inputs <input-topics>
+```
+
+Here’s an example command:
+
+```bash
+./bin/pulsar-admin sink create --className org.apache.pulsar.io.cassandra
--jar ~/application.jar --tenant test --namespace ns1 --name cassandra-sink
--inputs test_topic
+```
+
+Instead of submitting a sink to run on an existing Pulsar cluster, you
alternatively can run a sink as a process on your local machine:
+
+```bash
+./bin/pulsar-admin sink localrun --className org.apache.pulsar.io.cassandra
--jar ~/application.jar --tenant test --namespace ns1 --name cassandra-sink
--inputs test_topic
+```
+
+## Available connectors
+
+At the moment, the following connectors are available for Pulsar:
+
+{% include connectors.html %}
diff --git a/site/docs/latest/getting-started/ConceptsAndArchitecture.md
b/site/docs/latest/getting-started/ConceptsAndArchitecture.md
index e07c5f9..47e73ff 100644
--- a/site/docs/latest/getting-started/ConceptsAndArchitecture.md
+++ b/site/docs/latest/getting-started/ConceptsAndArchitecture.md
@@ -585,6 +585,34 @@ Pulsar currently supports S3 as a long term store.
Offloading to S3 triggered vi
{% include admonition.html type="info" content="For a guide for setting up
tiered storage, see the [Tiered storage
cookbook](../../cookbooks/tiered-storage)." %}
+## Pulsar IO
+
+Messaging systems are most powerful when you can easily use them in
conjunction with external systems like databases and other messaging systems.
**Pulsar IO** is a feature of Pulsar that enables you to easily create, deploy,
and manage Pulsar **connectors** that interact with external systems, such as
[Apache Cassandra](https://cassandra.apache.org),
[Aerospike](https://www.aerospike.com), and many others.
+
+{% include admonition.html type="info" title="Pulsar IO and Pulsar Functions"
+ content="Under the hood, Pulsar IO connectors are specialized [Pulsar
Functions](#pulsar-functions) purpose-built to interface with external systems.
The [administrative interface](../../cookbooks/pulsar-io) for Pulsar IO is, in
fact, quite similar to that of Pulsar Functions." %}
+
+### Sources and sinks
+
+Pulsar IO connectors come in two types:
+
+* **Sources** feed data *into* Pulsar from other systems. Common sources
include other messaging systems and "firehose"-style data pipeline APIs.
+* **Sinks** are fed data *from* Pulsar. Common sinks include other messaging
systems and SQL and NoSQL databases.
+
+This diagram illustrates the relationship between sources, sinks, and Pulsar:
+
+{% include figure.html src="/img/pulsar-io.png" alt="Pulsar IO diagram"
caption="Pulsar IO connectors (sources and sinks)" width="80" %}
+
+### Working with connectors
+
+Pulsar IO connectors can be managed via the
[`pulsar-admin`](../../reference/CliTools#pulsar-admin) CLI tool, in particular
the [`source`](../../reference/CliTools#pulsar-admin-source) and
[`sink`](../../reference/CliTools#pulsar-admin-sink) commands.
+
+{% include admonition.html type="info" content="For a guide to managing
connectors in your Pulsar installation, see the [Pulsar IO
cookbook](../../cookbooks/pulsar-io#managing-connectors)." %}
+
+The following connectors are currently available for Pulsar:
+
+{% include connectors.html %}
+
## Schema registry
Type safety is extremely important in any application built around a message
bus like Pulsar. {% popover Producers %} and {% popover consumers %} need some
kind of mechanism for coordinating types at the {% popover topic %} level lest
a wide variety of potential problems arise (for example serialization and
deserialization issues). Applications typically adopt one of two basic
approaches to type safety in messaging:
diff --git a/site/img/pulsar-io.png b/site/img/pulsar-io.png
new file mode 100644
index 0000000..3e74d4b
Binary files /dev/null and b/site/img/pulsar-io.png differ