Hi, How many nodes do you have? I saw 4 nodes in JSON file. Are really 4 or were you playing with tests?
Lvc@ On 24 March 2015 at 22:21, Amir Khawaja <[email protected]> wrote: > Continuing with this thread. I ended up just deleting the database and > recreating it and the problem went away. Not sure why it went away. > Nevertheless, I am now using the following > default-distributed-db-config.json: > > { > "replication": true, > "autoDeploy": true, > "hotAlignment": false, > "resyncEvery": 15, > "clusters": { > "internal": { > "replication": false > }, > "index": { > "replication": false > }, > "*": { > "replication": true, > "readQuorum": 1, > "writeQuorum": 1, > "failureAvailableNodesLessQuorum": false, > "readYourWrites": true, > "partitioning": { > "strategy": "round-robin", > "default": 0, > "partitions": [ > [ "<NEW_NODE>" ] > ] > } > } > } > } > > However, I noticed that now the following warning appears in the logs on > each cluster node: > > WARNING readQuorum setting not found for cluster=[class name]_[node name] > in distributed-config.json > > Why would this warning appear? Is it something that will eventually > compromise data integrity? Does anyone have any ideas about this? Thanks. > > Amir. > > > On Tuesday, March 24, 2015 at 1:13:31 PM UTC-5, Amir Khawaja wrote: >> >> Please find the contents of the distributed-config.json file below: >> >> {"@type":"d","@version":0,"version":58,"replication": >> true,"autoDeploy":true,"hotAlignment":false," >> resyncEvery":15,"clusters":{"@type":"d","@version":0," >> internal":{"@type":"d","@version":0,"replication": >> false},"index":{"@type":"d","@version":0,"replication": >> false},"*":{"@type":"d","@version":0,"replication":true, >> "readQuorum":1,"writeQuorum":1,"failureAvailableNodesLessQuoru >> m":false,"readYourWrites":true,"servers":["odb01ue2"," >> odb02ue2","odb01uw","odb02uw","<NEW_NODE>"]},"triggered_ >> odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> visitor_odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> visitortrait_odb02ue2":{"@type":"d","@version":0,"servers":["odb02ue2"," >> odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"v_odb02ue2":{" >> @type":"d","@version":0,"servers":["odb02ue2"," >> odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"event_ >> odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> has_odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> eventtrait_odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> orole_odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> license_odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]},"_ >> studio_odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> customer_odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> orids_odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> oschedule_odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> foreignidentifier_odb02ue2":{"@type":"d","@version":0," >> servers":["odb02ue2","odb01ue2","odb01uw","odb02uw", >> "<NEW_NODE>"]},"e_odb02ue2":{"@type":"d","@version":0," >> servers":["odb02ue2","odb01ue2","odb01uw","odb02uw", >> "<NEW_NODE>"]},"ouser_odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> ofunction_odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> belongsto_odb02ue2":{"@type":"d","@version":0,"servers":[" >> odb02ue2","odb01ue2","odb01uw","odb02uw","<NEW_NODE>"]}," >> belongsto_odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"_ >> studio_odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]}," >> orids_odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]}," >> eventtrait_odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"v_ >> odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]}," >> visitor_odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]}," >> ouser_odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]}," >> triggered_odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]}," >> oschedule_odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]},"e_ >> odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]}," >> ofunction_odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]}," >> orole_odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]}," >> visitortrait_odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]}," >> license_odb01uw":{"@type":"d","@version":0,"servers":[" >> odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_NODE>"]}," >> foreignidentifier_odb01uw":{"@type":"d","@version":0," >> servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_ >> NODE>"]},"event_odb01uw":{"@type":"d","@version":0," >> servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_ >> NODE>"]},"customer_odb01uw":{"@type":"d","@version":0," >> servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_ >> NODE>"]},"has_odb01uw":{"@type":"d","@version":0," >> servers":["odb01uw","odb01ue2","odb02ue2","odb02uw","<NEW_ >> NODE>"]},"_studio_odb02uw":{"@type":"d","@version":0," >> servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_ >> NODE>"]},"customer_odb02uw":{"@type":"d","@version":0," >> servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_ >> NODE>"]},"orids_odb02uw":{"@type":"d","@version":0," >> servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_ >> NODE>"]},"visitortrait_odb02uw":{"@type":"d","@version":0,"servers":[" >> odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]}," >> license_odb02uw":{"@type":"d","@version":0,"servers":[" >> odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]}," >> event_odb02uw":{"@type":"d","@version":0,"servers":[" >> odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]}," >> triggered_odb02uw":{"@type":"d","@version":0,"servers":[" >> odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]},"v_ >> odb02uw":{"@type":"d","@version":0,"servers":[" >> odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]}," >> orole_odb02uw":{"@type":"d","@version":0,"servers":[" >> odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]}," >> foreignidentifier_odb02uw":{"@type":"d","@version":0," >> servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_ >> NODE>"]},"has_odb02uw":{"@type":"d","@version":0," >> servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_ >> NODE>"]},"eventtrait_odb02uw":{"@type":"d","@version":0," >> servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_ >> NODE>"]},"visitor_odb02uw":{"@type":"d","@version":0," >> servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_ >> NODE>"]},"ofunction_odb02uw":{"@type":"d","@version":0," >> servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_ >> NODE>"]},"belongsto_odb02uw":{"@type":"d","@version":0," >> servers":["odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_ >> NODE>"]},"e_odb02uw":{"@type":"d","@version":0,"servers":[" >> odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]}," >> oschedule_odb02uw":{"@type":"d","@version":0,"servers":[" >> odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]}," >> ouser_odb02uw":{"@type":"d","@version":0,"servers":[" >> odb02uw","odb01ue2","odb02ue2","odb01uw","<NEW_NODE>"]}}} >> >> >> Amir. >> >> >> >> On Tuesday, March 24, 2015 at 1:02:46 PM UTC-5, Colin wrote: >>> >>> For some reason it's trying to reach a quorum of 4. >>> >>> Could you paste your database's distributed-config.json file please? >>> >>> -Colin >>> >>> On Tuesday, March 24, 2015 at 12:40:15 PM UTC-5, Amir Khawaja wrote: >>>> >>>> The cluster is now online in US East2 and US West. I did the following: >>>> >>>> - Changed the default-distributed-db-config.json to: >>>> >>>> { >>>> "replication": true, >>>> "autoDeploy": true, >>>> "hotAlignment": false, >>>> "resyncEvery": 15, >>>> "clusters": { >>>> "internal": { >>>> "replication": false >>>> }, >>>> "index": { >>>> "replication": false >>>> }, >>>> "*": { >>>> "replication": true, >>>> "readQuorum": 1, >>>> "writeQuorum": 1, >>>> "failureAvailableNodesLessQuorum": false, >>>> "readYourWrites": true, >>>> "partitioning": { >>>> "strategy": "round-robin", >>>> "default": 0, >>>> "partitions": [ >>>> [ "<NEW_NODE>" ] >>>> ] >>>> } >>>> } >>>> } >>>> } >>>> >>>> - Deleted the distributed-config.json file from each database folder >>>> and restarted each node in the cluster. >>>> >>>> Now, when I connect to one of the nodes and try to delete a vertex, I >>>> receive the following error: >>>> >>>> com.orientechnologies.orient.server.distributed.ODistributedException: >>>> Error on executing distributed request (id=141 >>>> from=odb02uw task=command_sql(delete vertex #42:2) userName=) against >>>> database 'vis.[]' to nodes [odb02ue2, odb02uw, >>>> odb01uw, odb01ue2] --> >>>> com.orientechnologies.orient.server.distributed.ODistributedException: >>>> Quorum 4 not reached for >>>> request (id=141 from=odb02uw task=command_sql(delete vertex #42:2) >>>> userName=). Timeout=407ms Servers in timeout/ >>>> conflict are: - odb02ue2: >>>> com.orientechnologies.orient.core.exception.OCommandExecutionException: >>>> Error on execution >>>> of command: sql.delete vertex #42:2 - odb01ue2: >>>> com.orientechnologies.orient.core.exception. >>>> OCommandExecutionException: Error on execution of command: sql.delete >>>> vertex #42:2 - odb01uw: com.orientechnologies. >>>> orient.core.exception.OCommandExecutionException: Error on execution >>>> of command: sql.delete vertex #42:2 Received: >>>> {odb02uw=com.orientechnologies.orient.core.exception.OCommandExecutionException: >>>> Error on execution of command: sql. >>>> delete vertex #42:2, odb01uw=com.orientechnologies. >>>> orient.core.exception.OCommandExecutionException: Error on >>>> execution of command: sql.delete vertex #42:2, odb02ue2=com. >>>> orientechnologies.orient.core.exception. >>>> OCommandExecutionException: Error on execution of command: sql.delete >>>> vertex #42:2, odb01ue2=com.orientechnologies. >>>> orient.core.exception.OCommandExecutionException: Error on execution >>>> of command: sql.delete vertex #42:2} >>>> >>>> Why am I not able to delete a vertex? >>>> >>>> Amir. >>>> >>>> >>>> On Tuesday, March 24, 2015 at 12:20:37 PM UTC-5, Colin wrote: >>>>> >>>>> That latency should be fine so long as it's consistent. >>>>> >>>>> -Colin >>>>> >>>>> On Tuesday, March 24, 2015 at 11:52:58 AM UTC-5, Amir Khawaja wrote: >>>>>> >>>>>> Hi Colin, >>>>>> >>>>>> I checked the latency prior to posting and between regions it is >>>>>> about 65ms on average. What should I set the latency to for Hazelcast? >>>>>> >>>>>> Amir. >>>>>> >>>>>> On Tuesday, March 24, 2015 at 11:49:25 AM UTC-5, Colin wrote: >>>>>>> >>>>>>> Hi Amir, >>>>>>> >>>>>>> You might also do a ping and a traceroute between the machines and >>>>>>> see what kind of latency you're getting, just in case it's a timeout >>>>>>> issue >>>>>>> with Hazelcast. >>>>>>> >>>>>>> -Colin >>>>>>> >>>>>>> On Tuesday, March 24, 2015 at 11:32:21 AM UTC-5, Amir Khawaja wrote: >>>>>>>> >>>>>>>> Hi Colin, >>>>>>>> >>>>>>>> Thank you for the prompt response. >>>>>>>> >>>>>>>> I'm a little confused as you say "the US West node will not come >>>>>>>>> online telling me that the database is not yet online. At that >>>>>>>>> point, I >>>>>>>>> kill the process and then eventually the database comes online." >>>>>>>> >>>>>>>> Do you mean you kill the database process and then restart it and >>>>>>>>> then it starts communicating? >>>>>>>> >>>>>>>> >>>>>>>> Yes. I kill the database process on the cluster node where the >>>>>>>> OrientDB is not coming online. >>>>>>>> >>>>>>>> Can you see on each machine when Hazelcast 'sees' all the members? >>>>>>>>> Are all the members showing up? >>>>>>>> >>>>>>>> >>>>>>>> Yes. I see the databases are talking to each other as the IP >>>>>>>> address of the nodes show up in the log of each database server. >>>>>>>> >>>>>>>> I will try setting hotAlignment to false and report my results on >>>>>>>> this thread. >>>>>>>> >>>>>>>> Amir. >>>>>>>> >>>>>>>> >>>>>>>> On Tuesday, March 24, 2015 at 11:25:16 AM UTC-5, Colin wrote: >>>>>>>>> >>>>>>>>> Hi Amir, >>>>>>>>> >>>>>>>>> Is it consistently a problem between the same machines not seeing >>>>>>>>> each other? >>>>>>>>> >>>>>>>>> I'm a little confused as you say "the US West node will not come >>>>>>>>> online telling me that the database is not yet online. At that >>>>>>>>> point, I >>>>>>>>> kill the process and then eventually the database comes online." >>>>>>>>> >>>>>>>>> Do you mean you kill the database process and then restart it and >>>>>>>>> then it starts communicating? >>>>>>>>> >>>>>>>>> In your distributed json file, try setting "hotAlignment" to false. >>>>>>>>> >>>>>>>>> Can you see on each machine when Hazelcast 'sees' all the >>>>>>>>> members? Are all the members showing up? >>>>>>>>> >>>>>>>>> -Colin >>>>>>>>> >>>>>>>>> Orient Technologies >>>>>>>>> >>>>>>>>> The Company behind OrientDB >>>>>>>>> >>>>>>>>> On Tuesday, March 24, 2015 at 11:19:05 AM UTC-5, Amir Khawaja >>>>>>>>> wrote: >>>>>>>>>> >>>>>>>>>> Greetings, everyone. Has anyone had much success running an >>>>>>>>>> OrientDB 2.0.5 cluster in Azure? I created a cluster in Windows >>>>>>>>>> Azure with >>>>>>>>>> 4 nodes using CentOS 7 and OrientDB Community 2.0.4 -- 2 nodes in US >>>>>>>>>> East2 >>>>>>>>>> and 2 nodes in US West. There is a Site-to-Site VPN connection >>>>>>>>>> between the >>>>>>>>>> two regions in Azure and data is flowing between machines across the >>>>>>>>>> network. I have three databases that I have currently deployed and >>>>>>>>>> testing. >>>>>>>>>> I find that many times the synchronization between databases does not >>>>>>>>>> occur. For instance, if I startup the first node in US East2 and >>>>>>>>>> once that >>>>>>>>>> comes online, fire up the second node in US West, the US West node >>>>>>>>>> will not >>>>>>>>>> come online telling me that the database is not yet online. At that >>>>>>>>>> point, >>>>>>>>>> I kill the process and then eventually the database comes online. I >>>>>>>>>> even >>>>>>>>>> have to go so far as to delete the databases in the database path >>>>>>>>>> folder. I >>>>>>>>>> do this a few times and eventually the server may startup. >>>>>>>>>> Sometimes, I >>>>>>>>>> will have three of the four nodes working and the fourth just >>>>>>>>>> refuses to >>>>>>>>>> come online. >>>>>>>>>> >>>>>>>>>> The VM size selected for each node in the cluster is a D4 (4 >>>>>>>>>> cores, 28GB RAM). This should be more than sufficient to handle most >>>>>>>>>> loads. >>>>>>>>>> Surely, I must be missing something as this is not acceptable >>>>>>>>>> production >>>>>>>>>> behavior. For reference, I am pasting the hazelcast.xml and >>>>>>>>>> default-distributed-db-config.json files here in hopes that >>>>>>>>>> someone has some pointers for me. >>>>>>>>>> >>>>>>>>>> *** hazelcast.xml *** >>>>>>>>>> >>>>>>>>>> <?xml version="1.0" encoding="UTF-8"?> >>>>>>>>>> <!-- ~ Copyright (c) 2008-2012, Hazel Bilisim Ltd. All Rights >>>>>>>>>> Reserved. ~ >>>>>>>>>> ~ Licensed under the Apache License, Version 2.0 (the "License"); >>>>>>>>>> ~ you may >>>>>>>>>> not use this file except in compliance with the License. ~ You >>>>>>>>>> may obtain >>>>>>>>>> a copy of the License at ~ ~ http://www.apache.org/ >>>>>>>>>> licenses/LICENSE-2.0 ~ >>>>>>>>>> ~ Unless required by applicable law or agreed to in writing, >>>>>>>>>> software ~ distributed >>>>>>>>>> under the License is distributed on an "AS IS" BASIS, ~ WITHOUT >>>>>>>>>> WARRANTIES >>>>>>>>>> OR CONDITIONS OF ANY KIND, either express or implied. ~ See the >>>>>>>>>> License for >>>>>>>>>> the specific language governing permissions and ~ limitations >>>>>>>>>> under the License. --> >>>>>>>>>> >>>>>>>>>> <hazelcast >>>>>>>>>> xsi:schemaLocation="http://www.hazelcast.com/schema/config >>>>>>>>>> hazelcast-config-3.0.xsd" >>>>>>>>>> xmlns="http://www.hazelcast.com/schema/config" xmlns:xsi=" >>>>>>>>>> http://www.w3.org/2001/XMLSchema-instance"> >>>>>>>>>> <group> >>>>>>>>>> <name>[name]</name> >>>>>>>>>> <password>[password]</password> >>>>>>>>>> </group> >>>>>>>>>> <network> >>>>>>>>>> <port auto-increment="true">2434</port> >>>>>>>>>> <join> >>>>>>>>>> <multicast enabled="false"> >>>>>>>>>> <multicast-group>235.1.1.1</multicast-group> >>>>>>>>>> <multicast-port>2434</multicast-port> >>>>>>>>>> </multicast> >>>>>>>>>> <tcp-ip enabled="true"> >>>>>>>>>> <member>10.0.0.4</member> >>>>>>>>>> <member>10.0.0.5</member> >>>>>>>>>> <member>10.1.0.4</member> >>>>>>>>>> <member>10.1.0.5</member> >>>>>>>>>> </tcp-ip> >>>>>>>>>> </join> >>>>>>>>>> </network> >>>>>>>>>> <executor-service> >>>>>>>>>> <pool-size>16</pool-size> >>>>>>>>>> </executor-service> >>>>>>>>>> </hazelcast> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> *** default-distributed-db-config.json *** >>>>>>>>>> >>>>>>>>>> { >>>>>>>>>> "autoDeploy": true, >>>>>>>>>> "hotAlignment": true, >>>>>>>>>> "executionMode": "synchronous", >>>>>>>>>> "readQuorum": 1, >>>>>>>>>> "writeQuorum": 3, >>>>>>>>>> "failureAvailableNodesLessQuorum": false, >>>>>>>>>> "readYourWrites": true, >>>>>>>>>> "clusters": { >>>>>>>>>> "internal": { >>>>>>>>>> }, >>>>>>>>>> "index": { >>>>>>>>>> }, >>>>>>>>>> "*": { >>>>>>>>>> "servers" : [ "<NEW_NODE>" ] >>>>>>>>>> } >>>>>>>>>> } >>>>>>>>>> } >>>>>>>>>> >>>>>>>>>> Thank you for any assistance you can offer. >>>>>>>>>> >>>>>>>>>> Amir. >>>>>>>>>> >>>>>>>>> -- > > --- > You received this message because you are subscribed to the Google Groups > "OrientDB" group. > To unsubscribe from this group and stop receiving emails from it, send an > email to [email protected]. > For more options, visit https://groups.google.com/d/optout. > -- --- You received this message because you are subscribed to the Google Groups "OrientDB" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. For more options, visit https://groups.google.com/d/optout.
