sijie closed pull request #2357: [deploy][documentation] Fix terraform-ansible 
script and update documentation
URL: https://github.com/apache/incubator-pulsar/pull/2357
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/deployment/terraform-ansible/aws/instances.tf 
b/deployment/terraform-ansible/aws/instances.tf
index 2eb8729298..bc42a0ba58 100644
--- a/deployment/terraform-ansible/aws/instances.tf
+++ b/deployment/terraform-ansible/aws/instances.tf
@@ -30,17 +30,41 @@ resource "aws_instance" "zookeeper" {
   }
 }
 
-resource "aws_instance" "pulsar" {
+resource "aws_instance" "bookie" {
   ami                    = "${var.aws_ami}"
-  instance_type          = "${var.instance_types["pulsar"]}"
+  instance_type          = "${var.instance_types["bookie"]}"
   key_name               = "${aws_key_pair.default.id}"
   subnet_id              = "${aws_subnet.default.id}"
   vpc_security_group_ids = ["${aws_security_group.default.id}"]
-  count                  = "${var.num_pulsar_brokers}"
+  count                  = "${var.num_bookie_nodes}"
 
   tags {
-    Name = "pulsar-${count.index + 1}"
+    Name = "bookie-${count.index + 1}"
   }
+}
 
-  associate_public_ip_address = true
+resource "aws_instance" "broker" {
+  ami                    = "${var.aws_ami}"
+  instance_type          = "${var.instance_types["broker"]}"
+  key_name               = "${aws_key_pair.default.id}"
+  subnet_id              = "${aws_subnet.default.id}"
+  vpc_security_group_ids = ["${aws_security_group.default.id}"]
+  count                  = "${var.num_broker_nodes}"
+
+  tags {
+    Name = "broker-${count.index + 1}"
+  }
+}
+
+resource "aws_instance" "proxy" {
+  ami                    = "${var.aws_ami}"
+  instance_type          = "${var.instance_types["proxy"]}"
+  key_name               = "${aws_key_pair.default.id}"
+  subnet_id              = "${aws_subnet.default.id}"
+  vpc_security_group_ids = ["${aws_security_group.default.id}"]
+  count                  = "${var.num_proxy_nodes}"
+
+  tags {
+    Name = "proxy-${count.index + 1}"
+  }
 }
diff --git a/deployment/terraform-ansible/aws/network.tf 
b/deployment/terraform-ansible/aws/network.tf
index c78c0dc9f0..97c6624562 100644
--- a/deployment/terraform-ansible/aws/network.tf
+++ b/deployment/terraform-ansible/aws/network.tf
@@ -91,7 +91,7 @@ resource "aws_route" "internet_access" {
 /* Load balancer */
 resource "aws_elb" "default" {
   name            = "pulsar-elb"
-  instances       = ["${aws_instance.pulsar.*.id}"]
+  instances       = ["${aws_instance.proxy.*.id}"]
   security_groups = ["${aws_security_group.elb.id}"]
   subnets         = ["${aws_subnet.default.id}"]
 
diff --git a/deployment/terraform-ansible/aws/output.tf 
b/deployment/terraform-ansible/aws/output.tf
index 784e0caa80..b9a3bb5cd7 100644
--- a/deployment/terraform-ansible/aws/output.tf
+++ b/deployment/terraform-ansible/aws/output.tf
@@ -30,5 +30,5 @@ output "pulsar_web_url" {
 }
 
 output "pulsar_ssh_host" {
-  value = "${aws_instance.pulsar.0.public_ip}"
+  value = "${aws_instance.proxy.0.public_ip}"
 }
diff --git a/deployment/terraform-ansible/aws/setup-disk.yaml 
b/deployment/terraform-ansible/aws/setup-disk.yaml
index e1360c0abe..21e7a6ef3b 100644
--- a/deployment/terraform-ansible/aws/setup-disk.yaml
+++ b/deployment/terraform-ansible/aws/setup-disk.yaml
@@ -18,7 +18,7 @@
 #
 
 - name: Disk setup
-  hosts: pulsar
+  hosts: bookie
   connection: ssh
   become: true
   tasks:
diff --git a/deployment/terraform-ansible/aws/terraform.tfvars 
b/deployment/terraform-ansible/aws/terraform.tfvars
index 7af884b91a..4ea1d6cd1b 100644
--- a/deployment/terraform-ansible/aws/terraform.tfvars
+++ b/deployment/terraform-ansible/aws/terraform.tfvars
@@ -22,10 +22,14 @@ region              = "us-west-2"
 availability_zone   = "us-west-2a"
 aws_ami             = "ami-9fa343e7"
 num_zookeeper_nodes = 3
-num_pulsar_brokers  = 3
+num_bookie_nodes    = 3
+num_broker_nodes    = 2
+num_proxy_nodes     = 1
 base_cidr_block     = "10.0.0.0/16"
 
 instance_types      = {
-  "pulsar"    = "i3.xlarge"
-  "zookeeper" = "t2.small"
+  "zookeeper"   = "t2.small"
+  "bookie"      = "i3.xlarge"
+  "broker"      = "c5.2xlarge"
+  "proxy"       = "c5.2xlarge"
 }
diff --git a/deployment/terraform-ansible/aws/variables.tf 
b/deployment/terraform-ansible/aws/variables.tf
index 56a7fae47c..ec98b690d4 100644
--- a/deployment/terraform-ansible/aws/variables.tf
+++ b/deployment/terraform-ansible/aws/variables.tf
@@ -49,8 +49,16 @@ variable "num_zookeeper_nodes" {
   description = "The number of EC2 instances running ZooKeeper"
 }
 
-variable "num_pulsar_brokers" {
-  description = "The number of EC2 instances running a Pulsar broker plus a 
BookKeeper bookie"
+variable "num_bookie_nodes" {
+  description = "The number of EC2 instances running BookKeeper"
+}
+
+variable "num_broker_nodes" {
+  description = "The number of EC2 instances running Pulsar brokers"
+}
+
+variable "num_proxy_nodes" {
+  description = "The number of EC2 instances running Pulsar proxies"
 }
 
 variable "instance_types" {
diff --git a/deployment/terraform-ansible/deploy-pulsar.yaml 
b/deployment/terraform-ansible/deploy-pulsar.yaml
index d586a5e1e0..49ea536067 100644
--- a/deployment/terraform-ansible/deploy-pulsar.yaml
+++ b/deployment/terraform-ansible/deploy-pulsar.yaml
@@ -18,7 +18,7 @@
 #
 
 - name: Pulsar setup
-  hosts: all
+  hosts: zookeeper, bookie, broker, proxy
   connection: ssh
   become: true
   tasks:
@@ -36,10 +36,9 @@
         - vim
     - set_fact:
         zookeeper_servers: "{{ groups['zookeeper']|map('extract', hostvars, 
['ansible_default_ipv4', 'address'])|map('regex_replace', '(.*)', '\\1:2181') | 
join(',') }}"
-        service_url: "pulsar://{{ hostvars[groups['pulsar'][0]].public_ip 
}}:6650/"
-        http_url: "http://{{ hostvars[groups['pulsar'][0]].public_ip }}:8080/"
+        service_url: "pulsar://{{ hostvars[groups['proxy'][0]].public_ip 
}}:6650/"
+        http_url: "http://{{ hostvars[groups['proxy'][0]].public_ip }}:8080/"
         pulsar_version: "2.1.0-incubating"
-
     - name: Download Pulsar binary package
       unarchive:
         src: http://archive.apache.org/dist/incubator/pulsar/pulsar-{{ 
pulsar_version }}/apache-pulsar-{{ pulsar_version }}-bin.tar.gz
@@ -47,8 +46,9 @@
         dest: /opt/pulsar
         extra_opts: ["--strip-components=1"]
     - set_fact:
-        max_heap_memory: "24g"
-        max_direct_memory: "24g"
+        max_heap_memory: "12g"
+        max_direct_memory: "12g"
+        cluster_name: "local"
     - name: Add pulsar_env.sh configuration file
       template:
         src: "../templates/pulsar_env.sh"
@@ -63,7 +63,6 @@
         zid: "{{ groups['zookeeper'].index(inventory_hostname) }}"
         max_heap_memory: "512m"
         max_direct_memory: "1g"
-        cluster_name: "local"
     - name: Create ZooKeeper data directory
       file:
         path: "/opt/pulsar/{{ item }}"
@@ -103,54 +102,79 @@
         chdir: /opt/pulsar
       when: groups['zookeeper'][0] == inventory_hostname
 
-- name: Set up Bookkeeper
-  hosts: pulsar
+- name: Set up bookies
+  hosts: bookie
   connection: ssh
   become: true
   tasks:
     - template:
         src: "../templates/bookkeeper.conf"
         dest: "/opt/pulsar/conf/bookkeeper.conf"
+    - template:
+        src: "../templates/bookkeeper.service"
+        dest: "/etc/systemd/system/bookkeeper.service"
+    - systemd:
+        state: restarted
+        daemon_reload: yes
+        name: "bookkeeper"
 
-- name: Set up Pulsar
-  hosts: pulsar
+- name: Set up brokers
+  hosts: broker
   connection: ssh
   become: true
   tasks:
+    - name: Download Pulsar IO package
+      unarchive:
+        src: http://archive.apache.org/dist/incubator/pulsar/pulsar-{{ 
pulsar_version }}/apache-pulsar-io-connectors-{{ pulsar_version }}-bin.tar.gz
+        remote_src: yes
+        dest: /opt/pulsar
+        extra_opts: ["--strip-components=1"]
     - name: Set up broker
       template:
         src: "../templates/broker.conf"
         dest: "/opt/pulsar/conf/broker.conf"
+    - name: Set up function worker
+      template:
+        src: "../templates/functions_worker.yml"
+        dest: "/opt/pulsar/conf/functions_worker.yml"
     - template:
-        src: "../templates/pulsar.service"
-        dest: "/etc/systemd/system/pulsar.service"
+        src: "../templates/pulsar.broker.service"
+        dest: "/etc/systemd/system/pulsar.broker.service"
     - systemd:
         state: restarted
         daemon_reload: yes
-        name: "pulsar"
+        name: "pulsar.broker"
 
-- name: Pulsar multi-tenancy setup
-  hosts: pulsar
+- name: Set up Pulsar Proxy
+  hosts: proxy
   connection: ssh
   become: true
   tasks:
-    - name: Create default property and namespace
-      shell: |
-        bin/pulsar-admin tenants create public \
-          --allowed-clusters local \
-          --admin-roles all
-        bin/pulsar-admin namespaces create public/local/default
-      args:
-        chdir: /opt/pulsar
-      when: groups['zookeeper'][0] == inventory_hostname
+    - name: Set up proxy
+      template:
+        src: "../templates/proxy.conf"
+        dest: "/opt/pulsar/conf/proxy.conf"
+    - template:
+        src: "../templates/pulsar.proxy.service"
+        dest: "/etc/systemd/system/pulsar.proxy.service"
+    - systemd:
+        state: restarted
+        daemon_reload: yes
+        name: "pulsar.proxy"
 
 - name:  Hosts addresses
   hosts: localhost
   become: false
   tasks:
     - debug:
-        msg: "Zookeeper servers {{ item }}"
+        msg: "Zookeeper Server {{ item }}"
       with_items: "{{ groups['zookeeper'] }}"
     - debug:
-        msg: "Pulsar/BookKeeper servers {{ item }}"
-      with_items: "{{ groups['pulsar'] }}"
+        msg: "Bookie {{ item }}"
+      with_items: "{{ groups['bookie'] }}"
+    - debug:
+        msg: "Broker {{ item }}"
+      with_items: "{{ groups['broker'] }}"
+    - debug:
+        msg: "Proxy {{ item }}"
+      with_items: "{{ groups['proxy'] }}"
diff --git a/deployment/terraform-ansible/templates/bookkeeper.service 
b/deployment/terraform-ansible/templates/bookkeeper.service
new file mode 100644
index 0000000000..7ec850542a
--- /dev/null
+++ b/deployment/terraform-ansible/templates/bookkeeper.service
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+[Unit]
+Description=BookKeeper
+After=network.target
+
+[Service]
+ExecStart=/opt/pulsar/bin/pulsar bookie
+WorkingDirectory=/opt/pulsar
+RestartSec=1s
+Restart=on-failure
+Type=simple
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/deployment/terraform-ansible/templates/broker.conf 
b/deployment/terraform-ansible/templates/broker.conf
index 73d4e2f524..da19f24f80 100644
--- a/deployment/terraform-ansible/templates/broker.conf
+++ b/deployment/terraform-ansible/templates/broker.conf
@@ -41,10 +41,10 @@ webServicePortTls=8443
 bindAddress=0.0.0.0
 
 # Hostname or IP address the service advertises to the outside world. If not 
set, the value of InetAddress.getLocalHost().getHostName() is used.
-advertisedAddress={{ hostvars[inventory_hostname].public_ip }}
+advertisedAddress={{ hostvars[inventory_hostname].private_ip }}
 
 # Name of the cluster to which this broker belongs to
-clusterName=local
+clusterName={{ cluster_name }}
 
 # Enable cluster's failure-domain which can distribute brokers into logical 
region
 failureDomainsEnabled=false
@@ -468,7 +468,7 @@ exposeTopicLevelMetricsInPrometheus=true
 ### --- Functions --- ###
 
 # Enable Functions Worker Service in Broker
-functionsWorkerEnabled=false
+functionsWorkerEnabled=true
 
 ### --- Broker Web Stats --- ###
 
diff --git a/deployment/terraform-ansible/templates/functions_worker.yml 
b/deployment/terraform-ansible/templates/functions_worker.yml
new file mode 100644
index 0000000000..100d467e7c
--- /dev/null
+++ b/deployment/terraform-ansible/templates/functions_worker.yml
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+workerId: standalone
+workerHostname: localhost
+workerPort: 6750
+
+connectorsDirectory: ./connectors
+
+functionMetadataTopicName: metadata
+clusterCoordinationTopicName: coordinate
+pulsarFunctionsNamespace: public/functions
+pulsarFunctionsCluster: {{ cluster_name }}
+pulsarServiceUrl: pulsar://localhost:6650
+pulsarWebServiceUrl: http://localhost:8080
+numFunctionPackageReplicas: 1
+downloadDirectory: /tmp/pulsar_functions
+#threadContainerFactory:
+#  threadGroupName: "Thread Function Container Group"
+processContainerFactory:
+  logDirectory:
+
+schedulerClassName: 
"org.apache.pulsar.functions.worker.scheduler.RoundRobinScheduler"
+functionAssignmentTopicName: "assignments"
+failureCheckFreqMs: 30000
+rescheduleTimeoutMs: 60000
+initialBrokerReconnectMaxRetries: 60
+assignmentWriteMaxRetries: 60
+instanceLivenessCheckFreqMs: 30000
+metricsSamplingPeriodSec: 60
diff --git a/deployment/terraform-ansible/templates/proxy.conf 
b/deployment/terraform-ansible/templates/proxy.conf
new file mode 100644
index 0000000000..21fa327f9b
--- /dev/null
+++ b/deployment/terraform-ansible/templates/proxy.conf
@@ -0,0 +1,123 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# The ZooKeeper quorum connection string (as a comma-separated list)
+zookeeperServers={{ zookeeper_servers }}
+
+# Configuration store connection string (as a comma-separated list)
+configurationStoreServers={{ zookeeper_servers }}
+
+# ZooKeeper session timeout (in milliseconds)
+zookeeperSessionTimeoutMs=30000
+
+# The port to use for server binary Protobuf requests
+servicePort=6650
+
+# The port to use to server binary Protobuf TLS requests
+servicePortTls=6651
+
+# Port that discovery service listen on
+webServicePort=8080
+
+# Port to use to server HTTPS request
+webServicePortTls=8443
+
+# Path for the file used to determine the rotation status for the proxy 
instance when responding
+# to service discovery health checks
+statusFilePath=
+
+### --- Authentication --- ###
+
+# Whether authentication is enabled for the Pulsar proxy
+authenticationEnabled=false
+
+# Authentication provider name list (a comma-separated list of class names)
+authenticationProviders=
+
+# Whether authorization is enforced by the Pulsar proxy
+authorizationEnabled=false
+
+# Authorization provider as a fully qualified class name
+authorizationProvider=org.apache.pulsar.broker.authorization.PulsarAuthorizationProvider
+
+# The three brokerClient* authentication settings below are for the proxy 
itself and determine how it
+# authenticates with Pulsar brokers
+
+# The authentication plugin used by the Pulsar proxy to authenticate with 
Pulsar brokers
+brokerClientAuthenticationPlugin=
+
+# The authentication parameters used by the Pulsar proxy to authenticate with 
Pulsar brokers
+brokerClientAuthenticationParameters=
+
+# The path to trusted certificates used by the Pulsar proxy to authenticate 
with Pulsar brokers
+brokerClientTrustCertsFilePath=
+
+# Role names that are treated as "super-users," meaning that they will be able 
to perform all admin
+# operations and publish/consume to/from all topics (as a comma-separated list)
+superUserRoles=
+
+# Whether client authorization credentials are forwared to the broker for 
re-authorization.
+# Authentication must be enabled via authenticationEnabled=true for this to 
take effect.
+forwardAuthorizationCredentials=false
+
+# --- RateLimiting ----
+# Max concurrent inbound connections. The proxy will reject requests beyond 
that.
+maxConcurrentInboundConnections=10000
+
+# Max concurrent outbound connections. The proxy will error out requests 
beyond that.
+maxConcurrentLookupRequests=10000
+
+##### --- TLS --- #####
+
+# Whether TLS is enabled for the proxy
+tlsEnabledInProxy=false
+
+# Whether TLS is enabled when communicating with Pulsar brokers
+tlsEnabledWithBroker=false
+
+# Path for the TLS certificate file
+tlsCertificateFilePath=
+
+# Path for the TLS private key file
+tlsKeyFilePath=
+
+# Path for the trusted TLS certificate file.
+# This cert is used to verify that any certs presented by connecting clients
+# are signed by a certificate authority. If this verification
+# fails, then the certs are untrusted and the connections are dropped.
+tlsTrustCertsFilePath=
+
+# Accept untrusted TLS certificate from client.
+# If true, a client with a cert which cannot be verified with the
+# 'tlsTrustCertsFilePath' cert will allowed to connect to the server,
+# though the cert will not be used for client authentication.
+tlsAllowInsecureConnection=false
+
+# Whether the hostname is validated when the proxy creates a TLS connection 
with brokers
+tlsHostnameVerificationEnabled=false
+
+# Whether client certificates are required for TLS. Connections are rejected 
if the client
+# certificate isn't trusted.
+tlsRequireTrustedClientCertOnConnect=false
+
+
+### --- Deprecated config variables --- ###
+
+# Deprecated. Use configurationStoreServers
+globalZookeeperServers=
diff --git a/deployment/terraform-ansible/templates/pulsar.service 
b/deployment/terraform-ansible/templates/pulsar.broker.service
similarity index 92%
rename from deployment/terraform-ansible/templates/pulsar.service
rename to deployment/terraform-ansible/templates/pulsar.broker.service
index e4e02d3077..a30b01224c 100644
--- a/deployment/terraform-ansible/templates/pulsar.service
+++ b/deployment/terraform-ansible/templates/pulsar.broker.service
@@ -22,7 +22,7 @@ Description=Pulsar Broker
 After=network.target
 
 [Service]
-ExecStart=/opt/pulsar/bin/pulsar broker --run-bookie --run-bookie-autorecovery
+ExecStart=/opt/pulsar/bin/pulsar broker
 WorkingDirectory=/opt/pulsar
 RestartSec=1s
 Restart=on-failure
diff --git a/deployment/terraform-ansible/templates/pulsar.proxy.service 
b/deployment/terraform-ansible/templates/pulsar.proxy.service
new file mode 100644
index 0000000000..40bf37c720
--- /dev/null
+++ b/deployment/terraform-ansible/templates/pulsar.proxy.service
@@ -0,0 +1,32 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+[Unit]
+Description=Pulsar Proxy
+After=network.target
+
+[Service]
+ExecStart=/opt/pulsar/bin/pulsar proxy
+WorkingDirectory=/opt/pulsar
+RestartSec=1s
+Restart=on-failure
+Type=simple
+
+[Install]
+WantedBy=multi-user.target
diff --git a/site2/docs/deploy-aws.md b/site2/docs/deploy-aws.md
index f827177457..5f3de01bc6 100644
--- a/site2/docs/deploy-aws.md
+++ b/site2/docs/deploy-aws.md
@@ -110,17 +110,21 @@ Variable name | Description | Default
 `availability_zone` | The AWS availability zone in which the Pulsar cluster 
will run | `us-west-2a`
 `aws_ami` | The [Amazon Machine 
Image](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) (AMI) that 
will be used by the cluster | `ami-9fa343e7`
 `num_zookeeper_nodes` | The number of 
[ZooKeeper](https://zookeeper.apache.org) nodes in the ZooKeeper cluster | 3
-`num_pulsar_brokers` | The number of Pulsar brokers and BookKeeper bookies 
that will run in the cluster | 3
+`num_bookie_nodes` | The number of bookies that will run in the cluster | 3
+`num_broker_nodes` | The number of Pulsar brokers that will run in the cluster 
| 2
+`num_proxy_nodes` | The number of Pulsar proxies that will run in the cluster 
| 1
 `base_cidr_block` | The root 
[CIDR](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) that will 
be used by network assets for the cluster | `10.0.0.0/16`
-`instance_types` | The EC2 instance types to be used. This variable is a map 
with two keys: `zookeeper` for the ZooKeeper instances and `pulsar` for the 
Pulsar brokers and BookKeeper bookies | `t2.small` (ZooKeeper) and `i3.xlarge` 
(Pulsar/BookKeeper)
+`instance_types` | The EC2 instance types to be used. This variable is a map 
with two keys: `zookeeper` for the ZooKeeper instances, `bookie` for the 
BookKeeper bookies and `broker` and `proxy` for Pulsar brokers and bookies | 
`t2.small` (ZooKeeper), `i3.xlarge` (BookKeeper) and `c5.2xlarge` 
(Brokers/Proxies)
 
 ### What is installed
 
 When you run the Ansible playbook, the following AWS resources will be used:
 
-* 6 total [Elastic Compute Cloud](https://aws.amazon.com/ec2) (EC2) instances 
running the [ami-9fa343e7](https://access.redhat.com/articles/3135091) Amazon 
Machine Image (AMI), which runs [Red Hat Enterprise Linux (RHEL) 
7.4](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html-single/7.4_release_notes/index).
 By default, that includes:
+* 9 total [Elastic Compute Cloud](https://aws.amazon.com/ec2) (EC2) instances 
running the [ami-9fa343e7](https://access.redhat.com/articles/3135091) Amazon 
Machine Image (AMI), which runs [Red Hat Enterprise Linux (RHEL) 
7.4](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html-single/7.4_release_notes/index).
 By default, that includes:
   * 3 small VMs for ZooKeeper 
([t2.small](https://www.ec2instances.info/?selected=t2.small) instances)
-  * 3 larger VMs for Pulsar [brokers](reference-terminology.md#broker) and 
BookKeeper [bookies](reference-terminology.md#bookie) 
([i3.4xlarge](https://www.ec2instances.info/?selected=i3.4xlarge) instances)
+  * 3 larger VMs for BookKeeper [bookies](reference-terminology.md#bookie) 
([i3.xlarge](https://www.ec2instances.info/?selected=i3.xlarge) instances)
+  * 2 larger VMs for Pulsar [brokers](reference-terminology.md#broker) 
([c5.2xlarge](https://www.ec2instances.info/?selected=c5.2xlarge) instances)
+  * 1 larger VMs for Pulsar [proxy](reference-terminology.md#proxy) 
([c5.2xlarge](https://www.ec2instances.info/?selected=c5.2xlarge) instances)
 * An EC2 [security 
group](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-network-security.html)
 * A [virtual private cloud](https://aws.amazon.com/vpc/) (VPC) for security
 * An [API Gateway](https://aws.amazon.com/api-gateway/) for connections from 
the outside world
@@ -187,6 +191,7 @@ If you've created a private SSH key at a location different 
from `~/.ssh/id_rsa`
 $ ansible-playbook \
   --user='ec2-user' \
   --inventory=`which terraform-inventory` \
+  --private-key="~/.ssh/some-non-default-key" \
   ../deploy-pulsar.yaml
 ```
 
diff --git 
a/site2/website/versioned_docs/version-2.1.0-incubating/deploy-aws.md 
b/site2/website/versioned_docs/version-2.1.0-incubating/deploy-aws.md
index 4c92a3c365..01ef6d1988 100644
--- a/site2/website/versioned_docs/version-2.1.0-incubating/deploy-aws.md
+++ b/site2/website/versioned_docs/version-2.1.0-incubating/deploy-aws.md
@@ -42,6 +42,24 @@ $ cd incubator-pulsar/deployment/terraform-ansible/aws
 
 ## SSH setup
 
+> If you already have an SSH key and would like to use it, you skip generating 
the SSH keys and update `private_key_file` setting
+> in `ansible.cfg` file and `public_key_path` setting in `terraform.tfvars` 
file.
+>
+> For example, if you already had a private SSH key in `~/.ssh/pulsar_aws` and 
a public key in `~/.ssh/pulsar_aws.pub`,
+> you can do followings:
+>
+> 1. update `ansible.cfg` with following values:
+>
+> ```shell
+> private_key_file=~/.ssh/pulsar_aws
+> ```
+>
+> 2. update `terraform.tfvars` with following values:
+>
+> ```shell
+> public_key_path=~/.ssh/pulsar_aws.pub
+> ```
+
 In order to create the necessary AWS resources using Terraform, you'll need to 
create an SSH key. To create a private SSH key in `~/.ssh/id_rsa` and a public 
key in `~/.ssh/id_rsa.pub`:
 
 ```bash
@@ -93,17 +111,21 @@ Variable name | Description | Default
 `availability_zone` | The AWS availability zone in which the Pulsar cluster 
will run | `us-west-2a`
 `aws_ami` | The [Amazon Machine 
Image](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) (AMI) that 
will be used by the cluster | `ami-9fa343e7`
 `num_zookeeper_nodes` | The number of 
[ZooKeeper](https://zookeeper.apache.org) nodes in the ZooKeeper cluster | 3
-`num_pulsar_brokers` | The number of Pulsar brokers and BookKeeper bookies 
that will run in the cluster | 3
+`num_bookie_nodes` | The number of bookies that will run in the cluster | 3
+`num_broker_nodes` | The number of Pulsar brokers that will run in the cluster 
| 2
+`num_proxy_nodes` | The number of Pulsar proxies that will run in the cluster 
| 1
 `base_cidr_block` | The root 
[CIDR](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) that will 
be used by network assets for the cluster | `10.0.0.0/16`
-`instance_types` | The EC2 instance types to be used. This variable is a map 
with two keys: `zookeeper` for the ZooKeeper instances and `pulsar` for the 
Pulsar brokers and BookKeeper bookies | `t2.small` (ZooKeeper) and `i3.xlarge` 
(Pulsar/BookKeeper)
+`instance_types` | The EC2 instance types to be used. This variable is a map 
with two keys: `zookeeper` for the ZooKeeper instances, `bookie` for the 
BookKeeper bookies and `broker` and `proxy` for Pulsar brokers and bookies | 
`t2.small` (ZooKeeper), `i3.xlarge` (BookKeeper) and `c5.2xlarge` 
(Brokers/Proxies)
 
 ### What is installed
 
 When you run the Ansible playbook, the following AWS resources will be used:
 
-* 6 total [Elastic Compute Cloud](https://aws.amazon.com/ec2) (EC2) instances 
running the [ami-9fa343e7](https://access.redhat.com/articles/3135091) Amazon 
Machine Image (AMI), which runs [Red Hat Enterprise Linux (RHEL) 
7.4](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html-single/7.4_release_notes/index).
 By default, that includes:
+* 9 total [Elastic Compute Cloud](https://aws.amazon.com/ec2) (EC2) instances 
running the [ami-9fa343e7](https://access.redhat.com/articles/3135091) Amazon 
Machine Image (AMI), which runs [Red Hat Enterprise Linux (RHEL) 
7.4](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html-single/7.4_release_notes/index).
 By default, that includes:
   * 3 small VMs for ZooKeeper 
([t2.small](https://www.ec2instances.info/?selected=t2.small) instances)
-  * 3 larger VMs for Pulsar [brokers](reference-terminology.md#broker) and 
BookKeeper [bookies](reference-terminology.md#bookie) 
([i3.4xlarge](https://www.ec2instances.info/?selected=i3.4xlarge) instances)
+  * 3 larger VMs for BookKeeper [bookies](reference-terminology.md#bookie) 
([i3.xlarge](https://www.ec2instances.info/?selected=i3.xlarge) instances)
+  * 2 larger VMs for Pulsar [brokers](reference-terminology.md#broker) 
([c5.2xlarge](https://www.ec2instances.info/?selected=c5.2xlarge) instances)
+  * 1 larger VMs for Pulsar [proxy](reference-terminology.md#proxy) 
([c5.2xlarge](https://www.ec2instances.info/?selected=c5.2xlarge) instances)
 * An EC2 [security 
group](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-network-security.html)
 * A [virtual private cloud](https://aws.amazon.com/vpc/) (VPC) for security
 * An [API Gateway](https://aws.amazon.com/api-gateway/) for connections from 
the outside world
@@ -134,6 +156,25 @@ At any point, you can destroy all AWS resources associated 
with your cluster usi
 $ terraform destroy
 ```
 
+## Setup Disks
+
+Before you run the Pulsar playbook, you want to mount the disks to the correct 
directories on those bookie nodes.
+Since different type of machines would have different disk layout, if you 
change the `instance_types` in your terraform
+config, you need to update the task defined in `setup-disk.yaml` file.
+
+To setup disks on bookie nodes, use this command:
+
+```bash
+$ ansible-playbook \
+  --user='ec2-user' \
+  --inventory=`which terraform-inventory` \
+  setup-disk.yaml
+```
+
+After running this command, the disks will be mounted under `/mnt/journal` as 
journal disk, and `/mnt/storage` as ledger disk.
+It is important to run this command only once! If you attempt to run this 
command again after you have run Pulsar playbook,
+it might be potentially erase your disks again and cause the bookies to fail 
to start up.
+
 ## Running the Pulsar playbook
 
 Once you've created the necessary AWS resources using Terraform, you can 
install and run Pulsar on the Terraform-created EC2 instances using Ansible. To 
do so, use this command:


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to