This is an automated email from the ASF dual-hosted git repository.

sijie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pulsar.git


The following commit(s) were added to refs/heads/master by this push:
     new f7c5696   adding pulsar io docs (#2001)
f7c5696 is described below

commit f7c569614d7d1cdccd306ff5aad3ed7c21e53f2a
Author: Boyang Jerry Peng <[email protected]>
AuthorDate: Mon Jun 25 10:29:21 2018 -0700

     adding pulsar io docs (#2001)
---
 .../java/org/apache/pulsar/admin/cli/CmdSinks.java |  19 ++-
 .../org/apache/pulsar/admin/cli/CmdSources.java    |  27 ++--
 site/_data/cli/pulsar-admin.yaml                   | 158 +++++++++++++++++++++
 site/_data/connectors.yaml                         |  49 +++++++
 site/_data/sidebar.yaml                            |   2 +
 site/_includes/{figure.html => connectors.html}    |  25 +++-
 site/_includes/figure.html                         |   9 +-
 site/_sass/_docs.scss                              |  18 +++
 site/docs/latest/cookbooks/pulsar-io.md            |  82 +++++++++++
 .../getting-started/ConceptsAndArchitecture.md     |  28 ++++
 site/img/pulsar-io.png                             | Bin 0 -> 37316 bytes
 11 files changed, 389 insertions(+), 28 deletions(-)

diff --git 
a/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSinks.java 
b/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSinks.java
index b5993e3..9d4595b 100644
--- 
a/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSinks.java
+++ 
b/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSinks.java
@@ -64,7 +64,7 @@ import static 
org.apache.pulsar.functions.utils.Utils.getSinkType;
 import static org.apache.pulsar.functions.worker.Utils.downloadFromHttpUrl;
 
 @Getter
-@Parameters(commandDescription = "Interface for managing Pulsar Sinks (Egress 
data from Pulsar)")
+@Parameters(commandDescription = "Interface for managing Pulsar IO sinks 
(egress data from Pulsar)")
 public class CmdSinks extends CmdBase {
 
     private final CreateSink createSink;
@@ -102,7 +102,7 @@ public class CmdSinks extends CmdBase {
         abstract void runCmd() throws Exception;
     }
 
-    @Parameters(commandDescription = "Run the Pulsar sink locally (rather than 
deploying it to the Pulsar cluster)")
+    @Parameters(commandDescription = "Run a Pulsar IO sink connector locally 
(rather than deploying it to the Pulsar cluster)")
     class LocalSinkRunner extends CreateSink {
 
         @Parameter(names = "--brokerServiceUrl", description = "The URL for 
the Pulsar broker")
@@ -139,7 +139,7 @@ public class CmdSinks extends CmdBase {
         }
     }
 
-    @Parameters(commandDescription = "Create Pulsar sink connectors")
+    @Parameters(commandDescription = "Submit a Pulsar IO sink connector to run 
in a Pulsar cluster")
     class CreateSink extends SinkCommand {
         @Override
         void runCmd() throws Exception {
@@ -152,7 +152,7 @@ public class CmdSinks extends CmdBase {
         }
     }
 
-    @Parameters(commandDescription = "Update Pulsar sink connectors")
+    @Parameters(commandDescription = "Update a Pulsar IO sink connector")
     class UpdateSink extends SinkCommand {
         @Override
         void runCmd() throws Exception {
@@ -165,7 +165,6 @@ public class CmdSinks extends CmdBase {
         }
     }
 
-    @Parameters(commandDescription = "Create Pulsar sink connectors")
     abstract class SinkCommand extends BaseCommand {
         @Parameter(names = "--tenant", description = "The sink's tenant")
         protected String tenant;
@@ -181,7 +180,7 @@ public class CmdSinks extends CmdBase {
         protected String topicsPattern;
         @Parameter(names = "--customSerdeInputs", description = "The map of 
input topics to SerDe class names (as a JSON string)")
         protected String customSerdeInputString;
-        @Parameter(names = "--processingGuarantees", description = "The 
processing guarantees (aka delivery semantics) applied to the Sink")
+        @Parameter(names = "--processingGuarantees", description = "The 
processing guarantees (aka delivery semantics) applied to the sink")
         protected FunctionConfig.ProcessingGuarantees processingGuarantees;
         @Parameter(names = "--parallelism", description = "The sink's 
parallelism factor (i.e. the number of sink instances to run)")
         protected Integer parallelism;
@@ -191,11 +190,11 @@ public class CmdSinks extends CmdBase {
         @Parameter(names = "--sinkConfigFile", description = "The path to a 
YAML config file specifying the "
                 + "sink's configuration")
         protected String sinkConfigFile;
-        @Parameter(names = "--cpu", description = "The cpu in cores that need 
to be allocated per function instance(applicable only to docker runtime)")
+        @Parameter(names = "--cpu", description = "The CPU (in cores) that 
needs to be allocated per sink instance (applicable only to Docker runtime)")
         protected Double cpu;
-        @Parameter(names = "--ram", description = "The ram in bytes that need 
to be allocated per function instance(applicable only to process/docker 
runtime)")
+        @Parameter(names = "--ram", description = "The RAM (in bytes) that 
need to be allocated per sink instance (applicable only to the process and 
Docker runtimes)")
         protected Long ram;
-        @Parameter(names = "--disk", description = "The disk in bytes that 
need to be allocated per function instance(applicable only to docker runtime)")
+        @Parameter(names = "--disk", description = "The disk (in bytes) that 
need to be allocated per sink instance (applicable only to Docker runtime)")
         protected Long disk;
         @Parameter(names = "--sinkConfig", description = "Sink config 
key/values")
         protected String sinkConfigString;
@@ -411,7 +410,7 @@ public class CmdSinks extends CmdBase {
         }
     }
 
-    @Parameters(commandDescription = "Stops a Pulsar sink or source")
+    @Parameters(commandDescription = "Stops a Pulsar IO sink connector")
     class DeleteSink extends BaseCommand {
 
         @Parameter(names = "--tenant", description = "The tenant of the sink")
diff --git 
a/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSources.java 
b/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSources.java
index b3e49fd..2ff338d 100644
--- 
a/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSources.java
+++ 
b/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdSources.java
@@ -42,7 +42,6 @@ import org.apache.pulsar.functions.utils.SourceConfig;
 import org.apache.pulsar.functions.utils.Utils;
 import org.apache.pulsar.functions.utils.validation.ConfigValidation;
 import 
org.apache.pulsar.functions.utils.validation.ValidatorImpls.ImplementsClassValidator;
-import org.apache.pulsar.io.core.Sink;
 import org.apache.pulsar.io.core.Source;
 
 import java.io.File;
@@ -52,18 +51,15 @@ import java.lang.reflect.Type;
 import java.net.MalformedURLException;
 import java.util.Map;
 
-import static org.apache.commons.lang3.StringUtils.isBlank;
-import static org.apache.commons.lang3.StringUtils.isNotBlank;
 import static org.apache.pulsar.common.naming.TopicName.DEFAULT_NAMESPACE;
 import static org.apache.pulsar.common.naming.TopicName.PUBLIC_TENANT;
 import static 
org.apache.pulsar.functions.utils.Utils.convertProcessingGuarantee;
 import static org.apache.pulsar.functions.utils.Utils.fileExists;
-import static org.apache.pulsar.functions.utils.Utils.getSinkType;
 import static org.apache.pulsar.functions.utils.Utils.getSourceType;
 import static org.apache.pulsar.functions.worker.Utils.downloadFromHttpUrl;
 
 @Getter
-@Parameters(commandDescription = "Interface for managing Pulsar Source 
(Ingress data to Pulsar)")
+@Parameters(commandDescription = "Interface for managing Pulsar IO Sources 
(ingress data into Pulsar)")
 public class CmdSources extends CmdBase {
 
     private final CreateSource createSource;
@@ -101,7 +97,7 @@ public class CmdSources extends CmdBase {
         abstract void runCmd() throws Exception;
     }
 
-    @Parameters(commandDescription = "Run the Pulsar source locally (rather 
than deploying it to the Pulsar cluster)")
+    @Parameters(commandDescription = "Run a Pulsar IO source connector locally 
(rather than deploying it to the Pulsar cluster)")
     class LocalSourceRunner extends CreateSource {
 
         @Parameter(names = "--brokerServiceUrl", description = "The URL for 
the Pulsar broker")
@@ -138,7 +134,7 @@ public class CmdSources extends CmdBase {
         }
     }
 
-    @Parameters(commandDescription = "Create Pulsar source connectors")
+    @Parameters(commandDescription = "Submit a Pulsar IO source connector to 
run in a Pulsar cluster")
     public class CreateSource extends SourceCommand {
         @Override
         void runCmd() throws Exception {
@@ -151,7 +147,7 @@ public class CmdSources extends CmdBase {
         }
     }
 
-    @Parameters(commandDescription = "Update Pulsar source connectors")
+    @Parameters(commandDescription = "Update a Pulsar IO source connector")
     public class UpdateSource extends SourceCommand {
         @Override
         void runCmd() throws Exception {
@@ -175,23 +171,22 @@ public class CmdSources extends CmdBase {
         protected FunctionConfig.ProcessingGuarantees processingGuarantees;
         @Parameter(names = "--className", description = "The source's class 
name")
         protected String className;
-        @Parameter(names = "--destinationTopicName", description = "Pulsar 
topic to ingress data to")
+        @Parameter(names = "--destinationTopicName", description = "The Pulsar 
topic to which data is sent")
         protected String destinationTopicName;
-        @Parameter(names = "--deserializationClassName", description = "The 
classname for SerDe class for the source")
+        @Parameter(names = "--deserializationClassName", description = "The 
SerDe classname for the source")
         protected String deserializationClassName;
         @Parameter(names = "--parallelism", description = "The source's 
parallelism factor (i.e. the number of source instances to run)")
         protected Integer parallelism;
-        @Parameter(names = "--jar", description = "Path to the jar file for 
the Source. It also supports url-path [http/https/file (file protocol assumes 
that file already exists on worker host)] from which worker can download the 
package.", listConverter = StringConverter.class)
+        @Parameter(names = "--jar", description = "The path to the jar file 
for the Source. It also supports url-path [http/https/file (file protocol 
assumes that file already exists on worker host)] from which worker can 
download the package.", listConverter = StringConverter.class)
         protected String jarFile;
-
         @Parameter(names = "--sourceConfigFile", description = "The path to a 
YAML config file specifying the "
                 + "source's configuration")
         protected String sourceConfigFile;
-        @Parameter(names = "--cpu", description = "The cpu in cores that need 
to be allocated per function instance(applicable only to docker runtime)")
+        @Parameter(names = "--cpu", description = "The CPU (in cores) that 
needs to be allocated per source instance (applicable only to Docker runtime)")
         protected Double cpu;
-        @Parameter(names = "--ram", description = "The ram in bytes that need 
to be allocated per function instance(applicable only to process/docker 
runtime)")
+        @Parameter(names = "--ram", description = "The RAM (in bytes) that 
need to be allocated per source instance (applicable only to the process and 
Docker runtimes)")
         protected Long ram;
-        @Parameter(names = "--disk", description = "The disk in bytes that 
need to be allocated per function instance(applicable only to docker runtime)")
+        @Parameter(names = "--disk", description = "The disk (in bytes) that 
need to be allocated per source instance (applicable only to Docker runtime)")
         protected Long disk;
         @Parameter(names = "--sourceConfig", description = "Source config 
key/values")
         protected String sourceConfigString;
@@ -391,7 +386,7 @@ public class CmdSources extends CmdBase {
         }
     }
 
-    @Parameters(commandDescription = "Stops a Pulsar source")
+    @Parameters(commandDescription = "Stops a Pulsar IO source connector")
     class DeleteSource extends BaseCommand {
 
         @Parameter(names = "--tenant", description = "The tenant of a sink or 
source")
diff --git a/site/_data/cli/pulsar-admin.yaml b/site/_data/cli/pulsar-admin.yaml
index 46fc0a4..c0d483f 100644
--- a/site/_data/cli/pulsar-admin.yaml
+++ b/site/_data/cli/pulsar-admin.yaml
@@ -510,6 +510,164 @@ commands:
     argument: cluster-name
   - name: delete
     description: Delete namespace isolation policy of a cluster. This 
operation requires superuser privileges.
+- name: sink
+  description: An interface for managing Pulsar IO sinks (egress data from 
Pulsar)
+  subcommands:
+  - name: create
+    description: Submit a Pulsar IO sink connector to run in a Pulsar cluster
+    options:
+    - flags: --className
+      description: The sink's Java class name
+    - flags: --cpu
+      description: The CPU (in cores) that needs to be allocated per sink 
instance (applicable only to the Docker runtime)
+    - flags: --customSerdeInputs
+      description: The map of input topics to SerDe class names (as a JSON 
string)
+    - flags: --disk
+      description: The disk (in bytes) that needs to be allocated per sink 
instance (applicable only to the Docker runtime)
+    - flags: --inputs
+      description: The sink's input topic(s) (multiple topics can be specified 
as a comma-separated list)
+    - flags: --jar
+      description: Path to the Java jar file for the sink
+    - flags: --name
+      description: The sink's name
+    - flags: --namespace
+      description: The sink's namespace
+    - flags: --parallelism
+      description: |
+        "The sink's parallelism factor (i.e. the number of sink instances to 
run)."
+    - flags: --processingGuarantees
+      description: |
+        "The processing guarantees (aka delivery semantics) applied to the 
sink. Available values: `ATLEAST_ONCE`, `ATMOST_ONCE`, `EFFECTIVELY_ONCE`."
+    - flags: --ram
+      description: The RAM (in bytes) that needs to be allocated per sink 
instance (applicable only to the Docker runtime)
+    - flags: --sinkConfig
+      description: Sink config key/values
+    - flags: --sinkConfigFile
+      description: The path to a YAML config file specifying the sink's 
configuration
+    - flags: --tenant
+      description: The sink's tenant
+  - name: delete
+    description: Stops a Pulsar IO sink
+    options:
+    - flags: --name
+      description: The name of the sink
+    - flags: --namespace
+      description: The namespace of the sink
+    - flags: --tenant
+      description: The tenant of the sink
+  - name: localrun
+    description: Run the Pulsar sink locally (rather than in the Pulsar 
cluster)
+    options:
+    - flags: --brokerServiceUrl
+      description: The URL for the Pulsar broker
+    - flags: --className
+      description: The sink's Java class name
+    - flags: --cpu
+      description: The CPU (in cores) that needs to be allocated per sink 
instance (applicable only to the Docker runtime)
+    - flags: --customSerdeInputs
+      description: The map of input topics to SerDe class names (as a JSON 
string)
+    - flags: --disk
+      description: The disk (in bytes) that needs to be allocated per sink 
instance (applicable only to the Docker runtime)
+    - flags: --inputs
+      description: The sink's input topic(s) (multiple topics can be specified 
as a comma-separated list)
+    - flags: --jar
+      description: Path to the Java jar file for the sink
+    - flags: --name
+      description: The sink's name
+    - flags: --namespace
+      description: The sink's namespace
+    - flags: --parallelism
+      description: |
+        "The sink's parallelism factor (i.e. the number of sink instances to 
run)."
+    - flags: --processingGuarantees
+      description: |
+        "The processing guarantees (aka delivery semantics) applied to the 
sink. Available values: `ATLEAST_ONCE`, `ATMOST_ONCE`, `EFFECTIVELY_ONCE`."
+    - flags: --ram
+      description: The RAM (in bytes) that needs to be allocated per sink 
instance (applicable only to the Docker runtime)
+    - flags: --sinkConfig
+      description: Sink config key/values
+    - flags: --sinkConfigFile
+      description: The path to a YAML config file specifying the sink's 
configuration
+    - flags: --tenant
+      description: The sink's tenant
+- name: source
+  description: An interface for managing Pulsar IO sources (ingress data into 
Pulsar)
+  subcommands:
+  - name: create
+    description: Submit a Pulsar IO source connector to run in a Pulsar cluster
+    options:
+    - flags: --className
+      description: The source's Java class name
+    - flags: --cpu
+      description: The CPU (in cores) that needs to be allocated per source 
instance (applicable only to the Docker runtime)
+    - flags: --deserializationClassName
+      description: The SerDe classname for the source
+    - flags: --destinationTopicName
+      description: The Pulsar topic to which data is sent
+    - flags: --disk
+      description: The disk (in bytes) that needs to be allocated per source 
instance (applicable only to the Docker runtime)
+    - flags: --jar
+      description: Path to the Java jar file for the source
+    - flags: --name
+      description: The source's name
+    - flags: --namespace
+      description: The source's namespace
+    - flags: --parallelism
+      description: |
+        The source's parallelism factor (i.e. the number of source instances 
to run).
+    - flags: --processingGuarantees
+      description: |
+        "The processing guarantees (aka delivery semantics) applied to the 
source. Available values: `ATLEAST_ONCE`, `ATMOST_ONCE`, `EFFECTIVELY_ONCE`."
+    - flags: --ram
+      description: The RAM (in bytes) that needs to be allocated per source 
instance (applicable only to the Docker runtime)
+    - flags: --sourceConfig
+      description: Source config key/values
+    - flags: --sourceConfigFile
+      description: The path to a YAML config file specifying the source's 
configuration
+    - flags: --tenant
+      description: The source's tenant
+  - name: delete
+    description: Stops a Pulsar IO source
+    options:
+    - flags: --name
+      description: The name of the source
+    - flags: --namespace
+      description: The namespace of the source
+    - flags: --tenant
+      description: The tenant of the source
+  - name: localrun
+    description: Run the Pulsar source locally (rather than in the Pulsar 
cluster)
+    options:
+    - flags: --className
+      description: The source's Java class name
+    - flags: --cpu
+      description: The CPU (in cores) that needs to be allocated per source 
instance (applicable only to the Docker runtime)
+    - flags: --deserializationClassName
+      description: The SerDe classname for the source
+    - flags: --destinationTopicName
+      description: The Pulsar topic to which data is sent
+    - flags: --disk
+      description: The disk (in bytes) that needs to be allocated per source 
instance (applicable only to the Docker runtime)
+    - flags: --jar
+      description: Path to the Java jar file for the source
+    - flags: --name
+      description: The source's name
+    - flags: --namespace
+      description: The source's namespace
+    - flags: --parallelism
+      description: |
+        The source's parallelism factor (i.e. the number of source instances 
to run).
+    - flags: --processingGuarantees
+      description: |
+        "The processing guarantees (aka delivery semantics) applied to the 
source. Available values: `ATLEAST_ONCE`, `ATMOST_ONCE`, `EFFECTIVELY_ONCE`."
+    - flags: --ram
+      description: The RAM (in bytes) that needs to be allocated per source 
instance (applicable only to the Docker runtime)
+    - flags: --sourceConfig
+      description: Source config key/values
+    - flags: --sourceConfigFile
+      description: The path to a YAML config file specifying the source's 
configuration
+    - flags: --tenant
+      description: The source's tenant
 - name: topics
   description: Operations for managing Pulsar topics (both persistent and non 
persistent)
   subcommands:
diff --git a/site/_data/connectors.yaml b/site/_data/connectors.yaml
new file mode 100644
index 0000000..37d332c
--- /dev/null
+++ b/site/_data/connectors.yaml
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+- name: Aerospike sink
+  url: https://www.aerospike.com/
+  class:
+    group: aerospike
+    name: AerospikeSink
+- name: Cassandra sink
+  url: https://cassandra.apache.org
+  class:
+    group: cassandra
+    name: CassandraSink
+- name: Kafka source
+  url: https://kafka.apache.org
+  class:
+    group: kafka
+    name: KafkaSource
+- name: Kafka sink
+  url: https://kafka.apache.org
+  class:
+    group: kafka
+    name: KafkaSink
+- name: RabbitMQ source
+  url: https://www.rabbitmq.com
+  class:
+    group: rabbitmq
+    name: RabbitMQSource
+- name: Twitter Firehose source
+  url: https://developer.twitter.com/en/docs
+  class:
+    group: twitter
+    name: TwitterFireHose
\ No newline at end of file
diff --git a/site/_data/sidebar.yaml b/site/_data/sidebar.yaml
index b061e44..cf08760 100644
--- a/site/_data/sidebar.yaml
+++ b/site/_data/sidebar.yaml
@@ -136,6 +136,8 @@ groups:
 - title: Cookbooks
   dir: cookbooks
   docs:
+  - title: Pulsar IO
+    endpoint: pulsar-io
   - title: Tiered Storage
     endpoint: tiered-storage
   - title: Topic compaction
diff --git a/site/_includes/figure.html b/site/_includes/connectors.html
similarity index 53%
copy from site/_includes/figure.html
copy to site/_includes/connectors.html
index cfda529..99c303a 100644
--- a/site/_includes/figure.html
+++ b/site/_includes/connectors.html
@@ -18,4 +18,27 @@
     under the License.
 
 -->
-<img src="{{ include.src }}"{{ if include.width }} width="{{ include.width 
}}%"{{ endif }}{{ if include.alt }} alt="{{ include.alt }}"{{ endif }}>
\ No newline at end of file
+<table>
+    <thead>
+    <tr>
+        <th>Name</th>
+        <th>Java class</th>
+    </tr>
+    </thead>
+    <tbody>
+    {% for connector in site.data.connectors %}
+    <tr>
+        <td>
+            <a href="{{ connector.url }}">
+                {{ connector.name }}
+            </a>
+        </td>
+        <td>
+            <a 
href="https://github.com/apache/incubator-pulsar/blob/master/pulsar-io/{{ 
connector.class.group }}/src/main/java/org/apache/pulsar/connect/{{ 
connector.class.group }}/{{ connector.class.name }}.java">
+                <code>org.apache.pulsar.io.{{ connector.class.group }}.{{ 
connector.class.name }}</code>
+            </a>
+        </td>
+    </tr>
+    {% endfor %}
+    </tbody>
+</table>
\ No newline at end of file
diff --git a/site/_includes/figure.html b/site/_includes/figure.html
index cfda529..b69fda5 100644
--- a/site/_includes/figure.html
+++ b/site/_includes/figure.html
@@ -18,4 +18,11 @@
     under the License.
 
 -->
-<img src="{{ include.src }}"{{ if include.width }} width="{{ include.width 
}}%"{{ endif }}{{ if include.alt }} alt="{{ include.alt }}"{{ endif }}>
\ No newline at end of file
+<figure>
+    <img src="{{ include.src }}"{% if include.width %} width="{{ include.width 
}}%"{% endif %}{% if include.alt %} alt="{{ include.alt }}"{% endif %}>
+    {% if include.caption %}
+    <figcaption>
+        {{ include.caption }}
+    </figcaption>
+    {% endif %}
+</figure>
diff --git a/site/_sass/_docs.scss b/site/_sass/_docs.scss
index 8ce3e57..111f9db 100644
--- a/site/_sass/_docs.scss
+++ b/site/_sass/_docs.scss
@@ -162,6 +162,24 @@
         margin-right: 2px;
       }
 
+      figure {
+          margin: 2rem 0;
+
+          img {
+            display: block;
+            max-width: 100%;
+            margin: 0 auto;
+          }
+
+          figcaption {
+            text-align: center;
+            margin-top: 1rem;
+            font-size: 1.5rem;
+            font-weight: 500;
+          }
+        }
+
+
       p img {
         display: block;
         margin: 20px auto;
diff --git a/site/docs/latest/cookbooks/pulsar-io.md 
b/site/docs/latest/cookbooks/pulsar-io.md
new file mode 100644
index 0000000..a5bf0ee
--- /dev/null
+++ b/site/docs/latest/cookbooks/pulsar-io.md
@@ -0,0 +1,82 @@
+---
+title: The Pulsar IO cookbook
+---
+
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+
+[Pulsar IO](../../getting-started/ConceptsAndArchitecture#pulsar-io) is a 
feature of Pulsar that enables you to easily create and manage **connectors** 
that interface with external systems, such as databases and other messaging 
systems.
+
+## Setup
+
+In order to run Pulsar IO connectors, you'll need to have a binary 
distribution of pulsar locally.
+
+## Managing connectors
+
+Pulsar connectors can be managed using the 
[`source`](../../reference/CliTools#pulsar-admin-source) and 
[`sink`](../../reference/CliTools#pulsar-admin-sink) commands of the 
[`pulsar-admin`](../../reference/CliTools#pulsar-admin) CLI tool.
+
+### Running sources
+
+You can use the [`create`](../../reference/CliTools#pulsar-admin-source-create)
+
+You can submit a sink to be run in an existing Pulsar cluster using a command 
of this form:
+
+```bash
+$ ./bin/pulsar-admin sink create --className  <classname> --jar <jar-location> 
--tenant test --namespace <namespace> --name <sink-name> --inputs <input-topics>
+```
+
+Here’s an example command:
+
+```bash
+bin/pulsar-admin source create --className  
org.apache.pulsar.io.twitter.TwitterFireHose --jar ~/application.jar --tenant 
test --namespace ns1 --name twitter-source --destinationTopicName twitter_data
+```
+
+Instead of submitting a source to run on an existing Pulsar cluster, you 
alternatively can run a source as a process on your local machine:
+
+```bash
+bin/pulsar-admin source localrun --className  
org.apache.pulsar.io.twitter.TwitterFireHose --jar ~/application.jar --tenant 
test --namespace ns1 --name twitter-source --destinationTopicName twitter_data
+```
+
+### Running Sinks
+
+You can submit a sink to be run in an existing Pulsar cluster using a command 
of this form:
+
+```bash
+./bin/pulsar-admin sink create --className  <classname> --jar <jar-location> 
--tenant test --namespace <namespace> --name <sink-name> --inputs <input-topics>
+```
+
+Here’s an example command:
+
+```bash
+./bin/pulsar-admin sink create --className  org.apache.pulsar.io.cassandra 
--jar ~/application.jar --tenant test --namespace ns1 --name cassandra-sink 
--inputs test_topic
+```
+
+Instead of submitting a sink to run on an existing Pulsar cluster, you 
alternatively can run a sink as a process on your local machine:
+
+```bash
+./bin/pulsar-admin sink localrun --className  org.apache.pulsar.io.cassandra 
--jar ~/application.jar --tenant test --namespace ns1 --name cassandra-sink 
--inputs test_topic
+```
+
+## Available connectors
+
+At the moment, the following connectors are available for Pulsar:
+
+{% include connectors.html %}
diff --git a/site/docs/latest/getting-started/ConceptsAndArchitecture.md 
b/site/docs/latest/getting-started/ConceptsAndArchitecture.md
index e07c5f9..47e73ff 100644
--- a/site/docs/latest/getting-started/ConceptsAndArchitecture.md
+++ b/site/docs/latest/getting-started/ConceptsAndArchitecture.md
@@ -585,6 +585,34 @@ Pulsar currently supports S3 as a long term store. 
Offloading to S3 triggered vi
 
 {% include admonition.html type="info" content="For a guide for setting up 
tiered storage, see the [Tiered storage 
cookbook](../../cookbooks/tiered-storage)." %}
 
+## Pulsar IO
+
+Messaging systems are most powerful when you can easily use them in 
conjunction with external systems like databases and other messaging systems. 
**Pulsar IO** is a feature of Pulsar that enables you to easily create, deploy, 
and manage Pulsar **connectors** that interact with external systems, such as 
[Apache Cassandra](https://cassandra.apache.org), 
[Aerospike](https://www.aerospike.com), and many others.
+
+{% include admonition.html type="info" title="Pulsar IO and Pulsar Functions"
+   content="Under the hood, Pulsar IO connectors are specialized [Pulsar 
Functions](#pulsar-functions) purpose-built to interface with external systems. 
The [administrative interface](../../cookbooks/pulsar-io) for Pulsar IO is, in 
fact, quite similar to that of Pulsar Functions." %}
+
+### Sources and sinks
+
+Pulsar IO connectors come in two types:
+
+* **Sources** feed data *into* Pulsar from other systems. Common sources 
include other messaging systems and "firehose"-style data pipeline APIs.
+* **Sinks** are fed data *from* Pulsar. Common sinks include other messaging 
systems and SQL and NoSQL databases.
+
+This diagram illustrates the relationship between sources, sinks, and Pulsar:
+
+{% include figure.html src="/img/pulsar-io.png" alt="Pulsar IO diagram" 
caption="Pulsar IO connectors (sources and sinks)" width="80" %}
+
+### Working with connectors
+
+Pulsar IO connectors can be managed via the 
[`pulsar-admin`](../../reference/CliTools#pulsar-admin) CLI tool, in particular 
the [`source`](../../reference/CliTools#pulsar-admin-source) and 
[`sink`](../../reference/CliTools#pulsar-admin-sink) commands.
+
+{% include admonition.html type="info" content="For a guide to managing 
connectors in your Pulsar installation, see the [Pulsar IO 
cookbook](../../cookbooks/pulsar-io#managing-connectors)." %}
+
+The following connectors are currently available for Pulsar:
+
+{% include connectors.html %}
+
 ## Schema registry
 
 Type safety is extremely important in any application built around a message 
bus like Pulsar. {% popover Producers %} and {% popover consumers %} need some 
kind of mechanism for coordinating types at the {% popover topic %} level lest 
a wide variety of potential problems arise (for example serialization and 
deserialization issues). Applications typically adopt one of two basic 
approaches to type safety in messaging:
diff --git a/site/img/pulsar-io.png b/site/img/pulsar-io.png
new file mode 100644
index 0000000..3e74d4b
Binary files /dev/null and b/site/img/pulsar-io.png differ

Reply via email to