Hi Junkai,

- Correct. I haven't added any rack-aware information.
- I'm connecting 1 instance at the startup and then expanding on-demand
(I've set ALLOW_PARTICIPANT_AUTO_JOIN to true).
- I've checked the live instances and other znodes in Zookeeper. Everything
looks ok, except /C8CEPCluster/EXTERNALVIEW/_mm:root:_system:cron2 has
empty `mapFields` while /C8CEPCluster/EXTERNALVIEW/_mm:root:_system:cron3
has `mapFields` with a ONLINE record. I still cannot understand why? and
what I'm doing wrong :(


*[zk: localhost:2181(CONNECTED) 18] get
/C8CEPCluster/CONFIGS/CLUSTER/C8CEPCluster*{
  "id" : "C8CEPCluster",
  "simpleFields" : {
    "allowParticipantAutoJoin" : "true"
  },
  "mapFields" : {
    "DEFAULT_INSTANCE_CAPACITY_MAP" : {
      "MEMORY" : "100",
      "CPU" : "100"
    },
    "DEFAULT_PARTITION_WEIGHT_MAP" : {
      "MEMORY" : "5",
      "CPU" : "5"
    }
  },
  "listFields" : {
    "INSTANCE_CAPACITY_KEYS" : [ "CPU", "MEMORY" ]
  }
}

*[zk: localhost:2181(CONNECTED) 8] get
/C8CEPCluster/LIVEINSTANCES/c8cep-0.c8cep.c8.svc.cluster.local_12000*{
  "id" : "c8cep-0.c8cep.c8.svc.cluster.local_12000",
  "simpleFields" : {
    "CURRENT_TASK_THREAD_POOL_SIZE" : "40",
    "HELIX_VERSION" : "1.0.4",
    "LIVE_INSTANCE" : "[email protected]",
    "SESSION_ID" : "106a30539a8003e"
  },
  "mapFields" : { },
  "listFields" : { }
}
[zk: localhost:2181(CONNECTED) 26] get
/C8CEPCluster/CONFIGS/RESOURCE/_mm:root:_system:cron2
{
  "id" : "_mm:root:_system:cron2",
  "simpleFields" : { },
  "mapFields" : {
    "PARTITION_CAPACITY_MAP" : {
      "DEFAULT" : "{\"CPU\":\"10\",\"MEMORY\":\"10\"}"
    }
  },
  "listFields" : { }
}

*[zk: localhost:2181(CONNECTED) 27] get
/C8CEPCluster/CONFIGS/RESOURCE/_mm:root:_system:cron3*{
  "id" : "_mm:root:_system:cron3",
  "simpleFields" : { },
  "mapFields" : {
    "PARTITION_CAPACITY_MAP" : {
      "DEFAULT" : "{\"CPU\":\"10\",\"MEMORY\":\"10\"}"
    }
  },
  "listFields" : { }
}

*[zk: localhost:2181(CONNECTED) 38] get
/C8CEPCluster/IDEALSTATES/_mm:root:_system:cron2*{
  "id" : "_mm:root:_system:cron2",
  "simpleFields" : {
    "DELAY_REBALANCE_ENABLED" : "true",
    "IDEAL_STATE_MODE" : "AUTO_REBALANCE",
    "MAX_PARTITIONS_PER_INSTANCE" : "1",
    "NUM_PARTITIONS" : "1",
    "REBALANCER_CLASS_NAME" :
"org.apache.helix.controller.rebalancer.waged.WagedRebalancer",
    "REBALANCE_DELAY" : "10000",
    "REBALANCE_MODE" : "FULL_AUTO",
    "REPLICAS" : "1",
    "STATE_MODEL_DEF_REF" : "C8CEPStateModel"
  },
  "mapFields" : {
    "_mm:root:_system:cron2_0" : { }
  },
  "listFields" : {
    "_mm:root:_system:cron2_0" : [ ]
  }
}

*[zk: localhost:2181(CONNECTED) 39] get
/C8CEPCluster/IDEALSTATES/_mm:root:_system:cron3*{
  "id" : "_mm:root:_system:cron3",
  "simpleFields" : {
    "DELAY_REBALANCE_ENABLED" : "true",
    "IDEAL_STATE_MODE" : "AUTO_REBALANCE",
    "MAX_PARTITIONS_PER_INSTANCE" : "1",
    "NUM_PARTITIONS" : "1",
    "REBALANCER_CLASS_NAME" :
"org.apache.helix.controller.rebalancer.waged.WagedRebalancer",
    "REBALANCE_DELAY" : "10000",
    "REBALANCE_MODE" : "FULL_AUTO",
    "REPLICAS" : "1",
    "STATE_MODEL_DEF_REF" : "C8CEPStateModel"
  },
  "mapFields" : {
    "_mm:root:_system:cron3_0" : { }
  },
  "listFields" : {
    "_mm:root:_system:cron3_0" : [ ]
  }
}

*[zk: localhost:2181(CONNECTED) 42] get
/C8CEPCluster/EXTERNALVIEW/_mm:root:_system:cron2*{
  "id" : "_mm:root:_system:cron2",
  "simpleFields" : {
    "BUCKET_SIZE" : "0",
    "DELAY_REBALANCE_ENABLED" : "true",
    "IDEAL_STATE_MODE" : "AUTO_REBALANCE",
    "MAX_PARTITIONS_PER_INSTANCE" : "1",
    "NUM_PARTITIONS" : "1",
    "REBALANCER_CLASS_NAME" :
"org.apache.helix.controller.rebalancer.waged.WagedRebalancer",
    "REBALANCE_DELAY" : "10000",
    "REBALANCE_MODE" : "FULL_AUTO",
    "REPLICAS" : "1",
    "STATE_MODEL_DEF_REF" : "C8CEPStateModel"
  },
  *"mapFields" : { },*
  "listFields" : { }
}

*[zk: localhost:2181(CONNECTED) 43] get
/C8CEPCluster/EXTERNALVIEW/_mm:root:_system:cron3*{
  "id" : "_mm:root:_system:cron3",
  "simpleFields" : {
    "BUCKET_SIZE" : "0",
    "DELAY_REBALANCE_ENABLED" : "true",
    "IDEAL_STATE_MODE" : "AUTO_REBALANCE",
    "MAX_PARTITIONS_PER_INSTANCE" : "1",
    "NUM_PARTITIONS" : "1",
    "REBALANCER_CLASS_NAME" :
"org.apache.helix.controller.rebalancer.waged.WagedRebalancer",
    "REBALANCE_DELAY" : "10000",
    "REBALANCE_MODE" : "FULL_AUTO",
    "REPLICAS" : "1",
    "STATE_MODEL_DEF_REF" : "C8CEPStateModel"
  },




*"mapFields" : {    "_mm:root:_system:cron3_0" : {
"c8cep-0.c8cep.c8.svc.cluster.local_12000" : "ONLINE"    }  }*,
  "listFields" : { }
}

Thank you.
Grainier Perera.


On Sat, 18 Jun 2022 at 10:45, Junkai Xue <[email protected]> wrote:

> OK. So you dont put any rackaware information. Then how many instances do
> you have connecting to that cluster? Please double check the live instances
> in Zookeeper as well.
>
> Best,
>
> Junkai
>
> On Fri, Jun 17, 2022 at 10:01 PM Grainier Perera <[email protected]>
> wrote:
>
>> Hi Junkai,
>>
>> I've added cluster init code to the gist [1]. Apart from that,
>> ClusterConfig is configured like this;
>>
>>             ClusterConfig clusterConfig =
>> configAccessor.getClusterConfig(CLUSTER_NAME);
>>             // Configuring the capacity keys in the Cluster Config. For
>> example, MEMORY.
>>             clusterConfig.setInstanceCapacityKeys(INSTANCE_CAPACITY_KEYS);
>>             // Configuring the instance capacity in the Instance Config.
>> For example, MEMORY = 100.
>>
>> clusterConfig.setDefaultInstanceCapacityMap(INSTANCE_CAPACITY);
>>             // Configuring the partition weight in the Resource Config.
>> For example, MEMORY = 5.
>>
>> clusterConfig.setDefaultPartitionWeightMap(DEFAULT_RESOURCE_USAGE);
>>             configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);
>>
>> [1]
>> https://gist.github.com/grainier/aa1c0b279ea99f88d74c1e94d79f5cdb#file-clustersetup-java
>>
>> Thanks,
>> Grainier Perera.
>>
>>
>> On Sat, 18 Jun 2022 at 10:00, Junkai Xue <[email protected]> wrote:
>>
>>> Could you please share your cluster config as well?
>>>
>>> Best,
>>>
>>> Junkai
>>>
>>> On Fri, Jun 17, 2022 at 8:24 PM Grainier Perera <[email protected]>
>>> wrote:
>>>
>>>> Hi Devs,
>>>>
>>>> I'm trying to add several resources to the cluster using the following
>>>> configurations[1]. However, only some will become `ONLINE`. What could be
>>>> the reason? Is there a way to guarantee every resource will become `ONLINE`
>>>> if WAGED capacity constraints are met?
>>>>
>>>> You can see with the same IdealState, "_mm:root:_system:cron3" has
>>>> mapFields and it is ONLINE, and "_mm:root:_system:cron2" is not.
>>>> Furthermore, I see this behavior more often when the replicas count is set
>>>> to 1.
>>>>
>>>> ResourceInfo:
>>>> 1. "_mm:root:_system:cron2"
>>>>
>>>> IdealState for _mm:root:_system:cron2:
>>>> {
>>>>   "id" : "_mm:root:_system:cron2",
>>>>   "simpleFields" : {
>>>>     "DELAY_REBALANCE_ENABLED" : "true",
>>>>     "IDEAL_STATE_MODE" : "AUTO_REBALANCE",
>>>>     "MAX_PARTITIONS_PER_INSTANCE" : "1",
>>>>     "NUM_PARTITIONS" : "1",
>>>>     "REBALANCER_CLASS_NAME" :
>>>> "org.apache.helix.controller.rebalancer.waged.WagedRebalancer",
>>>>     "REBALANCE_DELAY" : "10000",
>>>>     "REBALANCE_MODE" : "FULL_AUTO",
>>>>     "REPLICAS" : "1",
>>>>     "STATE_MODEL_DEF_REF" : "C8CEPStateModel"
>>>>   },
>>>>   "mapFields" : {
>>>>     "_mm:root:_system:cron2_0" : { }
>>>>   },
>>>>   "listFields" : {
>>>>     "_mm:root:_system:cron2_0" : [ ]
>>>>   }
>>>> }
>>>>
>>>>
>>>> ExternalView for _mm:root:_system:cron2:
>>>> {
>>>>   "id" : "_mm:root:_system:cron2",
>>>>   "simpleFields" : {
>>>>     "BUCKET_SIZE" : "0",
>>>>     "DELAY_REBALANCE_ENABLED" : "true",
>>>>     "IDEAL_STATE_MODE" : "AUTO_REBALANCE",
>>>>     "MAX_PARTITIONS_PER_INSTANCE" : "1",
>>>>     "NUM_PARTITIONS" : "1",
>>>>     "REBALANCER_CLASS_NAME" :
>>>> "org.apache.helix.controller.rebalancer.waged.WagedRebalancer",
>>>>     "REBALANCE_DELAY" : "10000",
>>>>     "REBALANCE_MODE" : "FULL_AUTO",
>>>>     "REPLICAS" : "1",
>>>>     "STATE_MODEL_DEF_REF" : "C8CEPStateModel"
>>>>   },
>>>>   *"mapFields" : { },*
>>>>   "listFields" : { }
>>>> }
>>>>
>>>>
>>>> 2. "_mm:root:_system:cron3"
>>>>
>>>> IdealState for _mm:root:_system:cron3:
>>>> {
>>>>   "id" : "_mm:root:_system:cron3",
>>>>   "simpleFields" : {
>>>>     "DELAY_REBALANCE_ENABLED" : "true",
>>>>     "IDEAL_STATE_MODE" : "AUTO_REBALANCE",
>>>>     "MAX_PARTITIONS_PER_INSTANCE" : "1",
>>>>     "NUM_PARTITIONS" : "1",
>>>>     "REBALANCER_CLASS_NAME" :
>>>> "org.apache.helix.controller.rebalancer.waged.WagedRebalancer",
>>>>     "REBALANCE_DELAY" : "10000",
>>>>     "REBALANCE_MODE" : "FULL_AUTO",
>>>>     "REPLICAS" : "1",
>>>>     "STATE_MODEL_DEF_REF" : "C8CEPStateModel"
>>>>   },
>>>>   "mapFields" : {
>>>>     "_mm:root:_system:cron3_0" : { }
>>>>   },
>>>>   "listFields" : {
>>>>     "_mm:root:_system:cron3_0" : [ ]
>>>>   }
>>>> }
>>>>
>>>>
>>>> ExternalView for _mm:root:_system:cron3:
>>>> {
>>>>   "id" : "_mm:root:_system:cron3",
>>>>   "simpleFields" : {
>>>>     "BUCKET_SIZE" : "0",
>>>>     "DELAY_REBALANCE_ENABLED" : "true",
>>>>     "IDEAL_STATE_MODE" : "AUTO_REBALANCE",
>>>>     "MAX_PARTITIONS_PER_INSTANCE" : "1",
>>>>     "NUM_PARTITIONS" : "1",
>>>>     "REBALANCER_CLASS_NAME" :
>>>> "org.apache.helix.controller.rebalancer.waged.WagedRebalancer",
>>>>     "REBALANCE_DELAY" : "10000",
>>>>     "REBALANCE_MODE" : "FULL_AUTO",
>>>>     "REPLICAS" : "1",
>>>>     "STATE_MODEL_DEF_REF" : "C8CEPStateModel"
>>>>   },
>>>>   *"mapFields" : {*
>>>> *    "_mm:root:_system:cron3_0" : {*
>>>> *      "c8cep-0.c8cep.c8.svc.cluster.local_12000" : "ONLINE"*
>>>> *    }*
>>>> *  },*
>>>>   "listFields" : { }
>>>> }
>>>>
>>>>
>>>> [1]: https://gist.github.com/grainier/aa1c0b279ea99f88d74c1e94d79f5cdb
>>>>
>>>> Thank you.
>>>> Grainier Perera.
>>>>
>>>

Reply via email to