Re: [Pgcluster-general] pglb hanging

Philip Marcus Fri, 30 Oct 2009 00:05:12 -0700

Here are my pglb.conf and pgreplicate.conf running on my "server1" which
runs pglb and pgreplicate daemons.


#pglb.conf

<Cluster_Server_Info>
    <Host_Name>                 clusterdb1.domain.com  </Host_Name>
    <Port>                      5432                    </Port>
    <Max_Connect>               512                     </Max_Connect>
</Cluster_Server_Info>

<Cluster_Server_Info>
    <Host_Name>                 clusterdb2.domain.com  </Host_Name>
    <Port>                      5432                    </Port>
    <Max_Connect>               512                     </Max_Connect>
</Cluster_Server_Info>

<Cluster_Server_Info>
    <Host_Name>                 clusterdb3.domain.com  </Host_Name>
    <Port>                      5432                    </Port>
    <Max_Connect>               512                     </Max_Connect>
</Cluster_Server_Info>


<Host_Name>                     pglb1.domain.com       </Host_Name>
<Backend_Socket_Dir>            /tmp
</Backend_Socket_Dir>
<Receive_Port>                  5432                    </Receive_Port>
<Recovery_Port>                 6001                    </Recovery_Port>
<Max_Cluster_Num>               128                     </Max_Cluster_Num>
<Use_Connection_Pooling>        no
</Use_Connection_Pooling>
<LifeCheck_Timeout>             3s                      </LifeCheck_Timeout>
<LifeCheck_Interval>            15s
</LifeCheck_Interval>
<Connection_Life_Time>          0s
</Connection_Life_Time>

<Log_File_Name>                 /var/log/postgresql/pglb1.log
</Log_File_Name>
<Log_File_Size>                 10M
</Log_File_Size>
<Log_Rotate>                    10
</Log_Rotate>


# pgreplicate.conf
<Cluster_Server_Info>
    <Host_Name>                 clusterdb1.domain.com          </Host_Name>
    <Port>                      5432                            </Port>
    <Recovery_Port>             7001
</Recovery_Port>
</Cluster_Server_Info>
<Cluster_Server_Info>
    <Host_Name>                 clusterdb2.domain.com          </Host_Name>
    <Port>                      5432                            </Port>
    <Recovery_Port>             7001
</Recovery_Port>
</Cluster_Server_Info>
<Cluster_Server_Info>
    <Host_Name>                 clusterdb3.domain.com          </Host_Name>
    <Port>                      5432                            </Port>
    <Recovery_Port>             7001
</Recovery_Port>
</Cluster_Server_Info>



<LoadBalance_Server_Info>
        <Host_Name>             pglb1.domain.com               </Host_Name>
        <Recovery_Port>         6001
</Recovery_Port>
</LoadBalance_Server_Info>


# I commented out this info below since I stopped using the 2 replication
servers in case that was the cause:
#------------------------------------------------------------
#<Replicate_Server_Info>
        #<Host_Name>            pgrepl1.domain.com             </Host_Name>
        #<Port>                 8002                            </Port>
        #<Recovery_Port>                8102
</Recovery_Port>
#</Replicate_Server_Info>

#<Replicate_Server_Info>
        #<Host_Name>             pgrepl2.domain.com             </Host_Name>
        #<Port>                  8002                            </Port>
        #<Recovery_Port>         8102
</Recovery_Port>
#</Replicate_Server_Info>


<Host_Name>                     pgrepl1.domain.com     </Host_Name>
<Replication_Port>              8001                    </Replication_Port>
<Recovery_Port>                 8101                    </Recovery_Port>
<RLOG_Port>                     8301                    </RLOG_Port>
<Use_Replication_Log>           no
</Use_Replication_Log>
<Replication_Timeout>           1min
</Replication_Timeout>
<LifeCheck_Timeout>             3s                      </LifeCheck_Timeout>
<LifeCheck_Interval>            15s
</LifeCheck_Interval>

<Log_File_Name>                 /var/log/postgresql/pgreplicate.log
</Log_File_Name>
<Log_File_Size>                 50M
</Log_File_Size>
<Log_Rotate>                    3
</Log_Rotate>
<Status_Log_File>               /var/log/postgresql/pgreplicate.log
</Status_Log_File>
<Error_Log_File>                /var/log/postgresql/pgreplicate.log
</Error_Log_File>


#Here's the pgreplicate.conf from the 2nd replication server (
pgrepl2.domain.com)
#pgreplicate.conf

<Cluster_Server_Info>
    <Host_Name>                 clusterdb1.domain.com          </Host_Name>
    <Port>                      5432                            </Port>
    <Recovery_Port>             7001
</Recovery_Port>
</Cluster_Server_Info>
<Cluster_Server_Info>
    <Host_Name>                 clusterdb2.domain.com          </Host_Name>
    <Port>                      5432                            </Port>
    <Recovery_Port>             7001
</Recovery_Port>
</Cluster_Server_Info>
<Cluster_Server_Info>
    <Host_Name>                 clusterdb3.domain.com          </Host_Name>
    <Port>                      5432                            </Port>
    <Recovery_Port>             7001
</Recovery_Port>
</Cluster_Server_Info>


<LoadBalance_Server_Info>
        <Host_Name>             pglb1.domain.com               </Host_Name>
        <Recovery_Port>         6001
</Recovery_Port>
</LoadBalance_Server_Info>


<Replicate_Server_Info>
        <Host_Name>             pgrepl1.domain.com             </Host_Name>
        <Port>                  8002                            </Port>
        <Recovery_Port>         8102
</Recovery_Port>
</Replicate_Server_Info>

# commented out since I don't think this is needed? How do I specify upper
cascade replicant?
#<Replicate_Server_Info>
        #<Host_Name>             pgrepl2.domain.com             </Host_Name>
        #<Port>                  8002                            </Port>
        #<Recovery_Port>         8102
</Recovery_Port>
#</Replicate_Server_Info>


<Host_Name>                     pgrepl2.domain.com     </Host_Name>
<Replication_Port>              8001                    </Replication_Port>
<Recovery_Port>                 8101                    </Recovery_Port>
<RLOG_Port>                     8301                    </RLOG_Port>
<Use_Replication_Log>           yes
</Use_Replication_Log>
<Replication_Timeout>           1min
</Replication_Timeout>
<LifeCheck_Timeout>             3s                      </LifeCheck_Timeout>
<LifeCheck_Interval>            15s
</LifeCheck_Interval>

<Log_File_Name>                 /var/log/postgresql/pgreplicate.log
</Log_File_Name>
<Log_File_Size>                 50M
</Log_File_Size>
<Log_Rotate>                    3
</Log_Rotate>
<Status_Log_File>               /var/log/postgresql/pgreplicate.log
</Status_Log_File>
<Error_Log_File>                /var/log/postgresql/pgreplicate.log
</Error_Log_File>
#############################################################################

In regards to your second question, no, the pglb process was still running.
( this happens both when I daemonize it, or keep it running in the
foreground with the "-v -n" option)

All of a sudden it just stops responding to any queries.
I also can not run a "status" on the PID. If I try to kill the PID, it
doesn't die, I actually need to kill -9 it.

Here's a link to the pglb.log file
http://commoncrawl.org/pglb1.log

The end part was when I was trying to do a -R restore which didn't work. Not
sure why though, I've set up all the SSH Keys and rsync works across all the
machines no problem, but not with trying to bring up the downed database to
a restore point.

Thanks for the help!

Philip

On Thu, Oct 29, 2009 at 11:39 PM, <[email protected]> wrote:

> Hi Philip,
>
> Would you let me know your setup files of pgreplicate.conf and pglb.conf.
>
> And one more,
>  was pglb process also downed when you found something wrong.
> If pglb process was living at that time, I need to know more detail about
> the situation.
>
> Regards,
> --
> At.Mitnai
>
>
> -- original message --
> From: Philip Marcus<[email protected]>
> To: <[email protected]>
> Sent: Thu, 29 Oct 2009 12:11:57 -0700
> Subject: [Pgcluster-general] pglb hanging
>
> >Dear PGCluster users,
> >
> >I've been playing around with PGCluster for about a week now, seeing if I
> >can get it working enough to use it in Production.
> >I've made some good progress, but met with something unusual just
> recently.
> >
> >My setup is as follows:
> >
> >4 physical machines:
> >I have 1 PGLB and 1 PGReplication server running on server1
> >I have 3 clusterdb's running on server2, server3, server4
> >
> >My main problem is this:
> >I start up all the services and things seem to be working fine.
> >I create a test DB by connecting through the PGLB and see it get
> replicated
> >to the 3 clusterDB machines.
> >I then drop this DB and that works fine. If I try to create another
> testDB,
> >the command just hangs indefinitely, until I kill it off, or kill and
> >restart the PGLB service.
> >
> >During this behaviour, I can still run any of the command directly to
> >cluster1, 2, or 3, and see it get replicated to the other clusterdb's.
> >Any command that I point to the PGLB service simply hangs.
> >
> >It sounds like it might be related to this:
> >http://pgfoundry.org/pipermail/pgcluster-general/2008-January/001798.html
> >but I'm not sure.
> >
> >I'm running the latest version:
> >pgcluster-1.9.0rc7 on all 4 servers.
> >
> >I see this in the pglb.sts file:
> >cat pglb.sts
> >Thu Oct 29 11:47:05 2009  port(5432) host:10.0.10.52 initialize
> >Thu Oct 29 11:47:05 2009  port(5432) host:10.0.10.62 initialize
> >Thu Oct 29 11:47:05 2009  port(5432) host:10.0.10.53 initialize
> >Thu Oct 29 11:47:19 2009  port(5432) host:10.0.10.52 start use
> >Thu Oct 29 11:48:24 2009  port(5432) host:10.0.10.62 start use
> >
> >
> >On another random note, when I try to run 2 PGreplicate servers, I get
> some
> >error about:
> > PGRsend_upper_cascade():upper cascade maybe down,challenge new one.
> >
> >I'm not clear on how I specify the upper cascade server versus just
> defining
> >all of the replication servers in the clusterdb.conf and pgreplicate.conf
> >Is there some undocumented Config that I'm missing?
> >
> >Thanks!
> >
> >Philip
> >
> >_______________________________________________
> >Pgcluster-general mailing list
> >[email protected]
> >http://pgfoundry.org/mailman/listinfo/pgcluster-general
> >
>
> _______________________________________________
> Pgcluster-general mailing list
> [email protected]
> http://pgfoundry.org/mailman/listinfo/pgcluster-general
>

_______________________________________________
Pgcluster-general mailing list
[email protected]
http://pgfoundry.org/mailman/listinfo/pgcluster-general

Re: [Pgcluster-general] pglb hanging

Reply via email to