It would be easier to give you an answer if you provide the
CIBs as attachment - specially in the state where you do not
understand why something happens ;-)

I am attaching the CIB with the status section stripped. It will now
be revealed that I actually have a few more resources configured,
which I in the interest of unclutteredness left out of the initial
presentation of the question.


Here a couple things what comes me in mind.

* i have no clue if colocation rules work on Cloned resources as you
intended. The question is: if a clone primitive failes on one node,
what impact does it have on the score for the clone ("database"
or "webserver")?

I am *assuming* clones works with colocation, because I can do:

  % crm_resource -W -r webserver
  resource webserver is running on: ska
  resource webserver is running on: dub

and:

  % crm_resource -W -r database
  resource database is running on: ska
  resource database is running on: dub

Also, it seems that the WebIP isn't started on a server until the
database and webserver comes up, which would be a result of the
colocation constraints.


* there is a nice tool called ptest from which you can get a lot of
info (toke me quite a while until i was able to work with it).

Here some filters i do when i debug a cluster:
/usr/lib/heartbeat/ptest -L -VVVVVVVV 2>&1|grep test_expression|less
/usr/lib/heartbeat/ptest -L -VVVVVVVV 2>&1|grep native_rsc_location|less
/usr/lib/heartbeat/ptest -L -VVVVVVVV 2>&1|grep resource_node_score|less

Useful. I will apply these when a resource fails and look at the scores.


* there are in general two ways to work with colocation:
a) resources can not run anywhere except i tell them thay may run on a certain
node
b) resource can run evereywhere except i tell them to not run there.
(i forgot the CIB attruibute which manages this behaviour but if you read the
DTD it should be explained there)

I assume this is controlled by either specifying score="INFINITY" or
score="-INFINITY" in the rsc_colocation.


---> this has an impact when your depeneding resource fail over

This is what I was hoping. I hope to gain a little more insight later
today when I can run some tests.


Your help is greatly appreciated. Thank you.

Best,
Christian
 <cib admin_epoch="0" have_quorum="true" num_peers="1" cib_feature_revision="1.3" ccm_transition="11" generated="true" dc_uuid="e9f7c85b-52f4-4aa8-8227-83e167e625bd" epoch="108" num_updates="25325">
   <configuration>
     <crm_config>
       <cluster_property_set id="default_cluster_properties" score="10">
         <attributes>
           <nvpair id="transition_idle_timeout" name="transition_idle_timeout" value="120s"/>
           <nvpair id="symmetric_cluster" name="symmetric_cluster" value="true"/>
           <nvpair id="no_quorum_policy" name="no_quorum_policy" value="stop"/>
         </attributes>
       </cluster_property_set>
       <cluster_property_set id="cib-bootstrap-options">
         <attributes>
           <nvpair id="cib-bootstrap-options-symmetric-cluster" name="symmetric-cluster" value="False"/>
           <nvpair name="last-lrm-refresh" id="cib-bootstrap-options-last-lrm-refresh" value="1181896725"/>
           <nvpair id="cib-bootstrap-options-is_managed_default" name="is_managed_default" value="true"/>
         </attributes>
       </cluster_property_set>
     </crm_config>
     <nodes>
       <node uname="ska" type="normal" id="e9f7c85b-52f4-4aa8-8227-83e167e625bd">
         <instance_attributes id="nodes-e9f7c85b-52f4-4aa8-8227-83e167e625bd">
           <attributes/>
         </instance_attributes>
       </node>
       <node uname="dub" type="normal" id="807e97ee-cba3-400f-9d78-44e7d3d1820b">
         <instance_attributes id="nodes-807e97ee-cba3-400f-9d78-44e7d3d1820b">
           <attributes/>
         </instance_attributes>
       </node>
     </nodes>
     <resources>
       <clone id="webserver" ordered="false" interleave="false" notify="false">
         <instance_attributes id="8d1d738f-23b4-4922-9fa6-2459bbe8cbf7">
           <attributes>
             <nvpair name="clone_max" value="2" id="1dc3de93-6e38-4959-976c-762840130e96"/>
             <nvpair name="clone_node_max" value="1" id="51131b26-0f3f-410b-a9e6-405e2e3fda33"/>
           </attributes>
         </instance_attributes>
         <primitive id="lighttpd" class="lsb" type="lighttpd">
           <operations>
             <op name="stop" timeout="60s" id="dbbbf88d-6051-43e8-b24d-e2d4162111bc"/>
             <op name="start" timeout="60s" id="b5f5171d-3aab-47dc-835b-27cfce41e632"/>
             <op name="monitor" interval="60s" timeout="60s" id="03a407f9-88f3-4900-8c26-c4281af7fb5d"/>
           </operations>
         </primitive>
       </clone>
       <clone id="database" ordered="false" interleave="false" notify="false">
         <instance_attributes id="4223c779-56ee-4042-9395-1f204bc1c291">
           <attributes>
             <nvpair name="clone_max" value="2" id="074db171-e8ee-47a3-84b0-7295547148c8"/>
             <nvpair name="clone_node_max" value="1" id="ef56f0f4-9aaa-46c9-ab08-4783811b72df"/>
           </attributes>
         </instance_attributes>
         <primitive id="MySQL" class="ocf" type="mysql" provider="heartbeat">
           <operations>
             <op id="MySQL_stop" name="stop" timeout="60s"/>
             <op id="MySQL_start" name="start" timeout="60s"/>
             <op id="MySQL_monitor" name="monitor" interval="60s" timeout="60s"/>
           </operations>
         </primitive>
       </clone>
       <primitive id="skaWebIP" class="ocf" type="IPaddr" provider="heartbeat">
         <instance_attributes id="3eee60e9-cb47-4927-8c93-0ccf807f1212">
           <attributes>
             <nvpair id="skaWebIP_ip" name="ip" value="88.198.56.25"/>
             <nvpair id="skaWebIP_nic" name="nic" value="eth0"/>
             <nvpair id="skaWebIP_netmask" name="netmask" value="27"/>
           </attributes>
         </instance_attributes>
       </primitive>
       <primitive id="dubWebIP" class="ocf" type="IPaddr" provider="heartbeat">
         <instance_attributes id="9c96fc07-232a-4ed2-a0b0-641233f6df51">
           <attributes>
             <nvpair id="dubWebIP_ip" name="ip" value="88.198.56.24"/>
             <nvpair id="dubWebIP_nic" name="nic" value="eth0"/>
             <nvpair id="dubWebIP_netmask" name="netmask" value="27"/>
           </attributes>
         </instance_attributes>
       </primitive>
       <primitive id="mailer" class="ocf" type="MailTo" provider="heartbeat">
         <instance_attributes id="b94c103e-6aee-4abe-bb68-08c4daeeffa8">
           <attributes>
             <nvpair name="email" value="[EMAIL PROTECTED]" id="76d40d2a-3e1a-4f22-9292-c9945c510a36"/>
             <nvpair name="subject" value="Heartbeat resource takeover" id="d5122587-01e4-4386-a1db-1f6fe7cbbf5e"/>
           </attributes>
         </instance_attributes>
       </primitive>
       <clone ordered="false" interleave="false" id="replication">
         <primitive class="ocf" type="MySQLSlave" id="MySQLSlave" provider="cscd">
           <operations>
             <op name="start" timeout="60s" id="11ab4eb6-fbd5-494e-884d-882ed998cb08"/>
             <op name="stop" timeout="60s" id="ce41eb01-04ea-4d19-96b6-5120f3684459"/>
             <op interval="60s" name="monitor" timeout="60s" id="23385bc3-a62f-41a0-a8aa-4a5c1731287c"/>
           </operations>
           <instance_attributes id="94a836d6-b08d-4df4-a447-d4ac2d89bd88">
             <attributes>
               <nvpair name="user" value="heartbeat" id="3fde4159-2347-4ff2-b755-614b482df999"/>
               <nvpair name="password" value="" id="bc6f627c-2f70-4e30-898f-c58d950f3944"/>
             </attributes>
           </instance_attributes>
         </primitive>
         <instance_attributes id="14d3f015-7535-432b-a548-6985b43f4214">
           <attributes>
             <nvpair name="clone_max" value="2" id="b4272f2e-1035-4e92-ac8a-88d61e73ffe0"/>
             <nvpair name="clone_node_max" value="1" id="0586c2eb-389d-4e28-b2c8-a590cd4d6209"/>
           </attributes>
         </instance_attributes>
       </clone>
       <clone id="mta" ordered="false" interleave="false" notify="false">
         <instance_attributes id="6675d07c-2f73-4794-8060-6dbd24d8cb9c">
           <attributes>
             <nvpair name="clone_max" value="2" id="748956f5-97b9-45c2-92a8-c1d0a6422058"/>
             <nvpair name="clone_node_max" value="1" id="d3356f8a-2be0-4120-a05f-86ec60568baa"/>
           </attributes>
         </instance_attributes>
         <primitive id="postfix" class="lsb" type="postfix">
           <operations>
             <op name="stop" timeout="10s" id="4c328cf2-6981-448f-a9f9-a7668739dfa3"/>
             <op name="start" timeout="10s" id="7d925d54-d416-4fb3-8ed3-b02f6562832a"/>
             <op name="monitor" interval="10s" id="2b1f56af-f2a8-45ad-a35b-608a17129df2"/>
           </operations>
         </primitive>
       </clone>
       <clone id="imap" ordered="false" interleave="false" notify="false">
         <instance_attributes id="3516a70b-0d27-427e-9ebf-2a4fa3ab19d3">
           <attributes>
             <nvpair name="clone_max" value="2" id="9cd86349-c7bb-4e6c-a577-16455bf1ba01"/>
             <nvpair name="clone_node_max" value="1" id="0d606db6-795e-4659-ae9d-e846401a967f"/>
           </attributes>
         </instance_attributes>
         <primitive id="dovecot" class="lsb" type="dovecot">
           <operations>
             <op name="stop" timeout="10s" id="d28c1362-092a-4258-9810-4945ed59bf22"/>
             <op name="start" timeout="10s" id="2c27ed13-5091-4e6b-8865-cba3dbbf41ff"/>
             <op name="monitor" interval="10s" id="95368560-cbe5-4338-a03c-65f7f2b9dccd"/>
           </operations>
         </primitive>
       </clone>
       <clone id="virusscan" ordered="false" interleave="false" notify="false">
         <instance_attributes id="11204bb0-55a3-47f7-9361-2c5d7de4927f">
           <attributes>
             <nvpair name="clone_max" value="2" id="58a08d0b-b402-4a13-b03e-93bee82b74bc"/>
             <nvpair name="clone_node_max" value="1" id="6383b8f3-97af-46cd-9e89-01e3eab3e7c9"/>
           </attributes>
         </instance_attributes>
         <primitive id="clamav-daemon" class="lsb" type="clamav-daemon">
           <operations>
             <op name="stop" timeout="10s" id="3e1a0c59-0706-491b-b509-fb00fd555de6"/>
             <op name="start" timeout="10s" id="8d44f30b-f21f-41d8-9e30-b9659a1a0b79"/>
             <op name="monitor" interval="10s" id="3a14c7cc-4152-4534-92f1-61dd19548446"/>
           </operations>
         </primitive>
       </clone>
     </resources>
     <constraints>
       <rsc_colocation id="skaWebIP_webserver_colocation" from="webserver" to="skaWebIP" score="INFINITY"/>
       <rsc_colocation id="dubWebIP_webserver_colocation" from="webserver" to="dubWebIP" score="INFINITY"/>
       <rsc_colocation id="skaWebIP_database_colocation" from="database" to="skaWebIP" score="INFINITY"/>
       <rsc_colocation id="dubWebIP_database_colocation" from="database" to="dubWebIP" score="INFINITY"/>
       <rsc_colocation id="dubWebIP_replication_colocation" from="replication" to="dubWebIP" score="INFINITY"/>
       <rsc_colocation id="skaWebIP_replication_colocation" from="replication" to="skaWebIP" score="INFINITY"/>
       <rsc_colocation id="database_replication_colocation" from="database" to="replication" score="INFINITY"/>
       <rsc_order id="database_replication_order" from="database" action="start" type="before" to="replication"/>
     </constraints>
   </configuration>
 </cib>

_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

Reply via email to