Continuing with this thread. I ended up just deleting the database and
recreating it and the problem went away. Not sure why it went away.
Nevertheless, I am now using the following
default-distributed-db-config.json:
{
"replication": true,
"autoDeploy": true,
"hotAlignment": false,
"resyncEvery": 15,
"clusters": {
"internal": {
"replication": false
},
"index": {
"replication": false
},
"*": {
"replication": true,
"readQuorum": 1,
"writeQuorum": 1,
"failureAvailableNodesLessQuorum": false,
"readYourWrites": true,
"partitioning": {
"strategy": "round-robin",
"default": 0,
"partitions": [
[ "<NEW_NODE>" ]
]
}
}
}
}
However, I noticed that now the following warning appears in the logs on
each cluster node:
WARNING readQuorum setting not found for cluster=[class name]_[node name]
in distributed-config.json
Why would this warning appear? Is it something that will eventually
compromise data integrity? Does anyone have any ideas about this? Thanks.
Amir.
On Tuesday, March 24, 2015 at 1:13:31 PM UTC-5, Amir Khawaja wrote:
>
> Please find the contents of the distributed-config.json file below:
>
>
> {"@type":"d","@version":0,"version":58,"replication":true,"autoDeploy":true,"hotAlignment":false,"resyncEvery":15,"clusters":{"@type":"d","@version":0,"internal":{"@type":"d","@version":0,"replication":false},"index":{"@type":"d","@version":0,"replication":false},"*":{"@type":"d","@version":0,"replication":true,"readQuorum":1,"writeQuorum":1,"failureAvailableNodesLessQuorum":false,"readYourWrites":true,"servers":["odb01ue2","odb02ue2","odb01uw","odb02uw","<NEW_NODE>"]},"triggered_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"visitor_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"visitortrait_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"v_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"event_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"has_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"eventtrait_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"orole_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"license_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"_studio_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"customer_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"orids_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"oschedule_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"foreignidentifier_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"e_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"ouser_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"ofunction_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"belongsto_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"belongsto_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"_studio_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"orids_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"eventtrait_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"v_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"visitor_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"ouser_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"triggered_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"oschedule_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"e_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"ofunction_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"orole_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"visitortrait_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"license_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"foreignidentifier_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"event_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"customer_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"has_odb01uw":{"@type":"d","@version":0,"servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"_studio_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"customer_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"orids_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"visitortrait_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"license_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"event_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"triggered_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"v_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"orole_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"foreignidentifier_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"has_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"eventtrait_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"visitor_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"ofunction_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"belongsto_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"e_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"oschedule_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"ouser_odb02uw":{"@type":"d","@version":0,"servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]}}}
>
>
> Amir.
>
>
>
> On Tuesday, March 24, 2015 at 1:02:46 PM UTC-5, Colin wrote:
>>
>> For some reason it's trying to reach a quorum of 4.
>>
>> Could you paste your database's distributed-config.json file please?
>>
>> -Colin
>>
>> On Tuesday, March 24, 2015 at 12:40:15 PM UTC-5, Amir Khawaja wrote:
>>>
>>> The cluster is now online in US East2 and US West. I did the following:
>>>
>>> - Changed the default-distributed-db-config.json to:
>>>
>>> {
>>> "replication": true,
>>> "autoDeploy": true,
>>> "hotAlignment": false,
>>> "resyncEvery": 15,
>>> "clusters": {
>>> "internal": {
>>> "replication": false
>>> },
>>> "index": {
>>> "replication": false
>>> },
>>> "*": {
>>> "replication": true,
>>> "readQuorum": 1,
>>> "writeQuorum": 1,
>>> "failureAvailableNodesLessQuorum": false,
>>> "readYourWrites": true,
>>> "partitioning": {
>>> "strategy": "round-robin",
>>> "default": 0,
>>> "partitions": [
>>> [ "<NEW_NODE>" ]
>>> ]
>>> }
>>> }
>>> }
>>> }
>>>
>>> - Deleted the distributed-config.json file from each database folder and
>>> restarted each node in the cluster.
>>>
>>> Now, when I connect to one of the nodes and try to delete a vertex, I
>>> receive the following error:
>>>
>>> com.orientechnologies.orient.server.distributed.ODistributedException:
>>> Error on executing distributed request (id=141
>>> from=odb02uw task=command_sql(delete vertex #42:2) userName=) against
>>> database 'vis.[]' to nodes [odb02ue2, odb02uw,
>>> odb01uw, odb01ue2] -->
>>> com.orientechnologies.orient.server.distributed.ODistributedException:
>>> Quorum 4 not reached for
>>> request (id=141 from=odb02uw task=command_sql(delete vertex #42:2)
>>> userName=). Timeout=407ms Servers in timeout/
>>> conflict are: - odb02ue2:
>>> com.orientechnologies.orient.core.exception.OCommandExecutionException:
>>> Error on execution
>>> of command: sql.delete vertex #42:2 - odb01ue2:
>>> com.orientechnologies.orient.core.exception.
>>> OCommandExecutionException: Error on execution of command: sql.delete
>>> vertex #42:2 - odb01uw: com.orientechnologies.
>>> orient.core.exception.OCommandExecutionException: Error on execution of
>>> command: sql.delete vertex #42:2 Received:
>>> {odb02uw=com.orientechnologies.orient.core.exception.OCommandExecutionException:
>>>
>>> Error on execution of command: sql.
>>> delete vertex #42:2,
>>> odb01uw=com.orientechnologies.orient.core.exception.OCommandExecutionException:
>>>
>>> Error on
>>> execution of command: sql.delete vertex #42:2,
>>> odb02ue2=com.orientechnologies.orient.core.exception.
>>> OCommandExecutionException: Error on execution of command: sql.delete
>>> vertex #42:2, odb01ue2=com.orientechnologies.
>>> orient.core.exception.OCommandExecutionException: Error on execution of
>>> command: sql.delete vertex #42:2}
>>>
>>> Why am I not able to delete a vertex?
>>>
>>> Amir.
>>>
>>>
>>> On Tuesday, March 24, 2015 at 12:20:37 PM UTC-5, Colin wrote:
>>>>
>>>> That latency should be fine so long as it's consistent.
>>>>
>>>> -Colin
>>>>
>>>> On Tuesday, March 24, 2015 at 11:52:58 AM UTC-5, Amir Khawaja wrote:
>>>>>
>>>>> Hi Colin,
>>>>>
>>>>> I checked the latency prior to posting and between regions it is about
>>>>> 65ms on average. What should I set the latency to for Hazelcast?
>>>>>
>>>>> Amir.
>>>>>
>>>>> On Tuesday, March 24, 2015 at 11:49:25 AM UTC-5, Colin wrote:
>>>>>>
>>>>>> Hi Amir,
>>>>>>
>>>>>> You might also do a ping and a traceroute between the machines and
>>>>>> see what kind of latency you're getting, just in case it's a timeout
>>>>>> issue
>>>>>> with Hazelcast.
>>>>>>
>>>>>> -Colin
>>>>>>
>>>>>> On Tuesday, March 24, 2015 at 11:32:21 AM UTC-5, Amir Khawaja wrote:
>>>>>>>
>>>>>>> Hi Colin,
>>>>>>>
>>>>>>> Thank you for the prompt response.
>>>>>>>
>>>>>>> I'm a little confused as you say "the US West node will not come
>>>>>>>> online telling me that the database is not yet online. At that point,
>>>>>>>> I
>>>>>>>> kill the process and then eventually the database comes online."
>>>>>>>
>>>>>>> Do you mean you kill the database process and then restart it and
>>>>>>>> then it starts communicating?
>>>>>>>
>>>>>>>
>>>>>>> Yes. I kill the database process on the cluster node where the
>>>>>>> OrientDB is not coming online.
>>>>>>>
>>>>>>> Can you see on each machine when Hazelcast 'sees' all the members?
>>>>>>>> Are all the members showing up?
>>>>>>>
>>>>>>>
>>>>>>> Yes. I see the databases are talking to each other as the IP address
>>>>>>> of the nodes show up in the log of each database server.
>>>>>>>
>>>>>>> I will try setting hotAlignment to false and report my results on
>>>>>>> this thread.
>>>>>>>
>>>>>>> Amir.
>>>>>>>
>>>>>>>
>>>>>>> On Tuesday, March 24, 2015 at 11:25:16 AM UTC-5, Colin wrote:
>>>>>>>>
>>>>>>>> Hi Amir,
>>>>>>>>
>>>>>>>> Is it consistently a problem between the same machines not seeing
>>>>>>>> each other?
>>>>>>>>
>>>>>>>> I'm a little confused as you say "the US West node will not come
>>>>>>>> online telling me that the database is not yet online. At that point,
>>>>>>>> I
>>>>>>>> kill the process and then eventually the database comes online."
>>>>>>>>
>>>>>>>> Do you mean you kill the database process and then restart it and
>>>>>>>> then it starts communicating?
>>>>>>>>
>>>>>>>> In your distributed json file, try setting "hotAlignment" to false.
>>>>>>>>
>>>>>>>> Can you see on each machine when Hazelcast 'sees' all the members?
>>>>>>>> Are all the members showing up?
>>>>>>>>
>>>>>>>> -Colin
>>>>>>>>
>>>>>>>> Orient Technologies
>>>>>>>>
>>>>>>>> The Company behind OrientDB
>>>>>>>>
>>>>>>>> On Tuesday, March 24, 2015 at 11:19:05 AM UTC-5, Amir Khawaja wrote:
>>>>>>>>>
>>>>>>>>> Greetings, everyone. Has anyone had much success running an
>>>>>>>>> OrientDB 2.0.5 cluster in Azure? I created a cluster in Windows Azure
>>>>>>>>> with
>>>>>>>>> 4 nodes using CentOS 7 and OrientDB Community 2.0.4 -- 2 nodes in US
>>>>>>>>> East2
>>>>>>>>> and 2 nodes in US West. There is a Site-to-Site VPN connection
>>>>>>>>> between the
>>>>>>>>> two regions in Azure and data is flowing between machines across the
>>>>>>>>> network. I have three databases that I have currently deployed and
>>>>>>>>> testing.
>>>>>>>>> I find that many times the synchronization between databases does not
>>>>>>>>> occur. For instance, if I startup the first node in US East2 and once
>>>>>>>>> that
>>>>>>>>> comes online, fire up the second node in US West, the US West node
>>>>>>>>> will not
>>>>>>>>> come online telling me that the database is not yet online. At that
>>>>>>>>> point,
>>>>>>>>> I kill the process and then eventually the database comes online. I
>>>>>>>>> even
>>>>>>>>> have to go so far as to delete the databases in the database path
>>>>>>>>> folder. I
>>>>>>>>> do this a few times and eventually the server may startup. Sometimes,
>>>>>>>>> I
>>>>>>>>> will have three of the four nodes working and the fourth just refuses
>>>>>>>>> to
>>>>>>>>> come online.
>>>>>>>>>
>>>>>>>>> The VM size selected for each node in the cluster is a D4 (4
>>>>>>>>> cores, 28GB RAM). This should be more than sufficient to handle most
>>>>>>>>> loads.
>>>>>>>>> Surely, I must be missing something as this is not acceptable
>>>>>>>>> production
>>>>>>>>> behavior. For reference, I am pasting the hazelcast.xml and
>>>>>>>>> default-distributed-db-config.json files here in hopes that someone
>>>>>>>>> has
>>>>>>>>> some pointers for me.
>>>>>>>>>
>>>>>>>>> *** hazelcast.xml ***
>>>>>>>>>
>>>>>>>>> <?xml version="1.0" encoding="UTF-8"?>
>>>>>>>>> <!-- ~ Copyright (c) 2008-2012, Hazel Bilisim Ltd. All Rights
>>>>>>>>> Reserved. ~
>>>>>>>>> ~ Licensed under the Apache License, Version 2.0 (the "License");
>>>>>>>>> ~ you may
>>>>>>>>> not use this file except in compliance with the License. ~ You may
>>>>>>>>> obtain
>>>>>>>>> a copy of the License at ~ ~
>>>>>>>>> http://www.apache.org/licenses/LICENSE-2.0 ~
>>>>>>>>> ~ Unless required by applicable law or agreed to in writing,
>>>>>>>>> software ~ distributed
>>>>>>>>> under the License is distributed on an "AS IS" BASIS, ~ WITHOUT
>>>>>>>>> WARRANTIES
>>>>>>>>> OR CONDITIONS OF ANY KIND, either express or implied. ~ See the
>>>>>>>>> License for
>>>>>>>>> the specific language governing permissions and ~ limitations
>>>>>>>>> under the License. -->
>>>>>>>>>
>>>>>>>>> <hazelcast
>>>>>>>>> xsi:schemaLocation="http://www.hazelcast.com/schema/config
>>>>>>>>> hazelcast-config-3.0.xsd"
>>>>>>>>> xmlns="http://www.hazelcast.com/schema/config" xmlns:xsi="
>>>>>>>>> http://www.w3.org/2001/XMLSchema-instance">
>>>>>>>>> <group>
>>>>>>>>> <name>[name]</name>
>>>>>>>>> <password>[password]</password>
>>>>>>>>> </group>
>>>>>>>>> <network>
>>>>>>>>> <port auto-increment="true">2434</port>
>>>>>>>>> <join>
>>>>>>>>> <multicast enabled="false">
>>>>>>>>> <multicast-group>235.1.1.1</multicast-group>
>>>>>>>>> <multicast-port>2434</multicast-port>
>>>>>>>>> </multicast>
>>>>>>>>> <tcp-ip enabled="true">
>>>>>>>>> <member>10.0.0.4</member>
>>>>>>>>> <member>10.0.0.5</member>
>>>>>>>>> <member>10.1.0.4</member>
>>>>>>>>> <member>10.1.0.5</member>
>>>>>>>>> </tcp-ip>
>>>>>>>>> </join>
>>>>>>>>> </network>
>>>>>>>>> <executor-service>
>>>>>>>>> <pool-size>16</pool-size>
>>>>>>>>> </executor-service>
>>>>>>>>> </hazelcast>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> *** default-distributed-db-config.json ***
>>>>>>>>>
>>>>>>>>> {
>>>>>>>>> "autoDeploy": true,
>>>>>>>>> "hotAlignment": true,
>>>>>>>>> "executionMode": "synchronous",
>>>>>>>>> "readQuorum": 1,
>>>>>>>>> "writeQuorum": 3,
>>>>>>>>> "failureAvailableNodesLessQuorum": false,
>>>>>>>>> "readYourWrites": true,
>>>>>>>>> "clusters": {
>>>>>>>>> "internal": {
>>>>>>>>> },
>>>>>>>>> "index": {
>>>>>>>>> },
>>>>>>>>> "*": {
>>>>>>>>> "servers" : [ "<NEW_NODE>" ]
>>>>>>>>> }
>>>>>>>>> }
>>>>>>>>> }
>>>>>>>>>
>>>>>>>>> Thank you for any assistance you can offer.
>>>>>>>>>
>>>>>>>>> Amir.
>>>>>>>>>
>>>>>>>>
--
---
You received this message because you are subscribed to the Google Groups
"OrientDB" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.