SOURCES: check_replication.pl (NEW) - added
glen
glen at pld-linux.org
Wed Aug 30 00:11:52 CEST 2006
Author: glen Date: Tue Aug 29 22:11:52 2006 GMT
Module: SOURCES Tag: HEAD
---- Log message:
- added
---- Files affected:
SOURCES:
check_replication.pl (NONE -> 1.1) (NEW)
---- Diffs:
================================================================
Index: SOURCES/check_replication.pl
diff -u /dev/null SOURCES/check_replication.pl:1.1
--- /dev/null Wed Aug 30 00:11:52 2006
+++ SOURCES/check_replication.pl Wed Aug 30 00:11:47 2006
@@ -0,0 +1,318 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+use DBI;
+
+# Changelog:
+# 20050304 - Skip the row count on InnoDB tables, becuase the SHOW TABLE STATUS
+# is just an ESTIMATE that varies wildly. Broke the check for
+# this out to its own subroutine to clean up compare_status().
+# 20050303 - Correct master-port handling (I dont use this now as I get the
+# master figured out from the slave by doing 'show slave status').
+# - Added the master and slave ports to the printed output (I run
+# many slaves on a slave host, one for each master I am slaving
+# from; sometimes I have separate masters on the same host so that
+# each one can be stopped, started independantly with their own
+# keycache).
+# Added an option --check-random-database", which looks at the
+# slave and sees which databases are being replicated, randomly
+# picks one of these and does a 'show table status' ot get back the
+# row_count, and if it differes by more than
+# 'table-rows-diff-absolute-crit' it adds this to the status line.
+# If your replication is good, then the number of row between the
+# master and slave should be about the same, and the update time on
+# each table within a replicated database should be about the same.
+# The 'about' is because the time difference in checking the slave and
+# then the master, and the delay in replication in the other direction.
+# - If there are table differences in the random check, then show a warning.
+#
+# 20050217 - Fix a type in the comments
+# - Convert the Seconds_Behind_Master to hours and seconds if it is large
+#
+# 20040120 - Make it find the master automatically, so you only specify a slave
+# - Update the output to show second behind for MySQL 4.1 slaves
+#
+# 20041102 - Support MySQl 4.1 Exec_master_log_pos -> Exec_Master_Log_Pos case change
+
+our $VERSION=0.03;
+
+# $Id$
+
+my $options = { 'slave-port' => 3306, 'slave' => 'slavehost', 'crit' => 0.5, 'warn' => 0.1 , 'slave-user' => 'repl', 'slave-pass' => 'password', 'debug' => 0, 'table-rows-diff-absolute-crit' => 10, 'table-rows-diff-absolute-warn' => 5};
+GetOptions($options, "slave=s", "slave-user=s", "slave-pass=s", "master=s", "master-port=i", "master-user=s", "master-pass=s", "crit=s", "warn=s", "help", "slave-port=i", "debug=i", "version", "check-random-database", "table-rows-diff-absolute-crit=i", "table-rows-diff-absolute-warn=i");
+my $max_binlog;
+
+if (defined $options->{'help'}) {
+ print <<FOO;
+$0: check replication between MySQL database instances
+
+ check_replication.pl [ --slave <host> ] [ --slave-pass <pass> ]
+ [ --slave-port <d> ] [ --slave-user <user> ] [ --master <host> ]
+ [ --master-pass <pass> ] [ --master-port <port> ] [ --master-user <user> ]
+ [ --crit <positions> ] [ --warn <positions> ] [ --check-random-database ]
+ [ --table-rows-diff-absolute-crit <number> ]
+ [ --table-rows-diff-absolute-warn <number> ]
+
+ --slave <host> - MySQL instance running as a slave server
+ --slave-port <d> - port for the slave
+ --slave-user <user> - Username with File/Process/Super privs
+ --slave-pass <pass> - Password for above user
+ --master <host> - MySQL instance running as server (override)
+ --master-port <d> - port for the master (override)
+ --master-user <user> - Username for master (override)
+ --master-pass <pass> - Password for master
+ --crit <positions> - Number of complete master binlogs for critical state
+ --warn <positions> - Number of complete master binlog for warning state
+ --check-random-database - Select a random DB from the slave's list of
+ databases and compare to the master's
+ information for these (need SELECT priv)
+ --table-rows-diff-absolute-crit <number> - If we do the check-random-database,
+ then ensure that the change in row count between master
+ and slave is below this threshold, and go critical if not
+ --table-rows-diff-absolute-warn <number> - If we do the check-random-database,
+ then ensure that the change in row count between master
+ and slave is below this threshold, and go warning if not
+ --help - This help page
+
+
+By default, you should use your configured replication user, as you will
+then only need to specify the user and password once, and this script will
+find the master from the slave's running configuration.
+
+Critical and warning values are no measured as amount of a complete master
+sized binlog. If your master has the default 1GB binlog size, then specifying
+a warning value of 0.1 means that your will let the slave get 100MB out of
+sync before warning; you may want to set warning to 0.01, and critical at 0.1.
+
+MySQL 3: GRANT File, Process on *.* TO repl\@192.168.0.% IDENTIFIED BY <pass>
+MySQL 4: GRANT Super, Replication_client on *.* TO repl\@192.168.0.% IDE...
+
+If you want to use the check-random-database option, then the user needs
+SELECT privileges on all replicated tables on the master and the slave.
+
+Note: Any mysqldump tables (for backups) may lock large tables for a long
+time. If you dump from your slave for this, then your master will gallop
+away from your slave, and the difference will become large. The trick is to
+set crit above this differnce and warn below.
+
+(c) 2005 Fotango. James Bromberger <jbromberger\@fotango.com>.
+FOO
+exit;
+} elsif (defined $options->{version}) {
+ printf "%s %s\n", $0, $VERSION;
+ exit;
+}
+
+
+sub debug {
+ my $level = shift;
+ my $message = shift;
+ return if $level > $options->{debug};
+ my $caller = (caller(1))[3];
+ print $caller . ":" . $message . "\n";
+}
+
+sub get_status {
+ my $host = shift;
+ my $port = shift;
+
+ debug(1, "Connecting to slave $host:$port as user " . $options->{'slave-user'});
+ my $dbh = DBI->connect("DBI:mysql:host=$host:port=$port", $options->{'slave-user'}, $options->{'slave-pass'});
+ if (not $dbh) {
+ print "UNKNOWN: cannot connect to $host";
+ exit 3;
+ }
+ my $sql = "show variables";
+ my $sth = $dbh->prepare($sql);
+ if (not $sth) {
+ print "UNKNOWN: cannot prepare $sql";
+ exit 3;
+ }
+ debug(2, "Getting slave variables");
+ my $res = $sth->execute;
+ my $slave_data;
+ while (my $ref = $sth->fetchrow_hashref) {
+ $slave_data->{$ref->{'Variable_name'}} = $ref->{'Value'};
+ }
+ $sth->finish;
+ $sql = "show slave status";
+ $sth = $dbh->prepare($sql);
+ if (not $sth) {
+ print "UNKNOWN: cannot prepare $sql";
+ exit 3;
+ }
+ debug(2, "Getting slave replication status");
+ $res = $sth->execute();
+ $slave_data->{replication} = $sth->fetchrow_hashref;
+ $sth->finish;
+
+ if (defined $options->{'check-random-database'}) {
+ debug(2, "Checking status of a random replicate-do-db");
+ my @replicated = split(',', $slave_data->{replication}->{Replicate_Do_DB});
+ my $random_db = $replicated[int(rand() * scalar(@replicated))];
+
+ if (defined $random_db) {
+ debug(3, "DBs being replicated are: " . join(', ', @replicated) . "; random choice is $random_db");
+ my $sql = "use $random_db";
+ $sth = $dbh->prepare($sql) || die "Cannot prepare: $!";
+ $res = $sth->execute();
+ $sth->finish;
+ $sql = 'show table status';
+ $sth = $dbh->prepare($sql);
+ $res = $sth->execute();
+ while (my $ref = $sth->fetchrow_hashref) {
+ $slave_data->{replication}->{table_status}->{$random_db}->{$ref->{Name}} = $ref;
+ }
+ }
+ }
+ $dbh->disconnect;
+
+ # Now connect to the master...
+ $host = $options->{'master'} || $slave_data->{replication}->{Master_Host};
+ $port = $options->{'master-port'} || $slave_data->{replication}->{Master_Port};
+ my $user = $options->{'master-user'} || $slave_data->{replication}->{Master_User};
+ my $pass = $options->{'master-pass'} || $options->{'slave-pass'};
+ debug(1, "Connecting to master $host:$port as user $user");
+ $dbh = DBI->connect("DBI:mysql:host=$host:port=$port", $user, $pass);
+
+ if (not $dbh) {
+ print "UNKNOWN: Cannot connect to master $host:$port";
+ die 3;
+ }
+ $sql = "show variables";
+ $sth = $dbh->prepare($sql);
+ debug(1, "Getting master variables");
+ $res = $sth->execute;
+ my $master_data;
+ while (my $ref = $sth->fetchrow_hashref) {
+ $master_data->{$ref->{'Variable_name'}} = $ref->{'Value'};
+ }
+ $sth->finish;
+ $sql = "show master status";
+ $sth = $dbh->prepare($sql);
+ debug(2, "Getting master replication status");
+ $res = $sth->execute;
+ $master_data->{replication} = $sth->fetchrow_hashref;
+ $sth->finish;
+
+ if (defined $options->{'check-random-database'}) {
+ foreach my $database (keys(%{$slave_data->{replication}->{table_status}})) {
+ debug(3, "The master should check $database");
+ $sth = $dbh->prepare("use $database");
+ $sth->execute || die "Cannot use db";
+ $sth->finish;
+ $sth = $dbh->prepare('show table status');
+ $res = $sth->execute;
+ while (my $ref = $sth->fetchrow_hashref) {
+ $master_data->{replication}->{table_status}->{$database}->{$ref->{Name}} = $ref;
+ }
+ }
+ }
+ $dbh->disconnect;
+
+ #use Data::Dumper;
+ #print Dumper($slave_data->{replication});
+ #print Dumper($master_data->{replication});
+ compare_status($master_data, $slave_data);
+}
+
+sub compare_table_rows {
+ # The two arguments, 'master' and 'slave' are references to the hashes from the respective
+ # SHOW MASTER STATUS queries. Both must be present, or we return nothing. We compare each,
+ # avoiding deficiencies in InnoDB tables, looking only at the row counts. If the row_count
+ # difference exceeds our limit, we add it to our message string.
+ my %args = @_;
+ return unless defined $args{master};
+ return unless defined $args{slave};
+
+ my @messages;
+ my $exit_level = 0;
+ foreach my $database (keys %{$args{slave}}) {
+ foreach my $table (keys %{$args{slave}->{$database}}) {
+ debug(4, "Checking $database.$table");
+ if ((defined($args{slave}->{$database}->{$table}->{Engine}) && $args{slave}->{$database}->{$table}->{Engine} eq 'InnoDB')|| (defined($args{slave}->{$database}->{$table}->{Type}) && $args{slave}->{$database}->{$table}->{Type} eq 'InnoDB')) {
+ # We can't rely on InnoDB's row count from 'SHOW TABLE STATUS' since it is an approximation
+ # For MySQL 4.1.10 and below, we cant even get an Update_time, since this is NULL!
+ # The only thing we /could/ do is reconnect and do a SELECT COUNT(*) FROM TABLE, but
+ # we can't be bothered! Humph.
+ debug(5, "Skipping check on InnoDB table $database.$table");
+ } else {
+ if (defined($args{slave}->{$database}->{$table}->{Rows}) && defined ($args{master}->{$database}->{$table}->{Rows})) {
+ my $row_diff = abs($args{slave}->{$database}->{$table}->{Rows} - $args{master}->{$database}->{$table}->{Rows});
+ if (abs($row_diff) > $options->{'table-rows-diff-absolute-crit'}) {
+ push @messages, "$database.$table $row_diff";
+ $exit_level = 2;
+ } elsif (abs($row_diff) > $options->{'table-rows-diff-absolute-warn'}) {
+ push @messages, "$database.$table $row_diff";
+ $exit_level = 1 unless $exit_level == 2;
+ }
+ } else {
+ debug(2, "$database.$table has now row count on slave!") unless defined($args{slave}->{$database}->{$table}->{Rows});
+ debug(2, "$database.$table has now row count on master!") unless defined($args{master}->{$database}->{$table}->{Rows});
+ }
+ }
+ }
+ }
+ return ($exit_level, join(', ', @messages));
+}
+
+sub compare_status {
+ my ($master, $slave) = @_;
+
+
+ # Step one; is the SQL slave thread running (critical if not)
+ if (lc($slave->{replication}->{'Slave_SQL_Running'}) ne lc('yes')) {
+ print "CRITICAL: Slave IO not running";
+ exit 2;
+ }
+
+ # Step two; compare the positions between the master and slave
+
+ # Pattern match the BINLOG number...
+ $master->{replication}->{'File_No'} = $1 if ($master->{replication}->{'File'} =~ /(\d+)$/);
+ $slave->{replication}->{'File_No'} = $1 if ($slave->{replication}->{'Relay_Master_Log_File'} =~ /(\d+)$/);
+
+ # Get the slave position it is executing, being careful of the
+ # key name change in MySQL 4.1 (case change)
+ $slave->{replication}->{'Position'} = $slave->{replication}->{'Exec_Master_Log_Pos'} || $slave->{replication}->{'Exec_Master_log_pos'};
+ #use Data::Dumper;
+ #debug(4, Dumper($slave->{replication}));
+
+ debug(3, " Master: " . $master->{replication}->{'File'} . ":" . $master->{replication}->{'Position'});
+ debug(3, " Slave: " . $slave->{replication}->{'Master_Log_File'} . ":" . $slave->{replication}->{'Position'});
+
+ my $diff = $master->{replication}->{'File_No'} - $slave->{replication}->{'File_No'} + (($master->{replication}->{'Position'} - $slave->{replication}->{'Position'}) / $slave->{max_binlog_size});
+
+ debug(1, "diff: $diff ");
+
+
+ # Compare the table status if we have them
+ my ($exit_level, $table_diff_message) = compare_table_rows(master => $master->{replication}->{table_status}, slave => $slave->{replication}->{table_status});
+
+ my $time_diff = "";
+ if (defined $slave->{'replication'}->{Seconds_Behind_Master}) {
+ if ($slave->{'replication'}->{Seconds_Behind_Master}> 3600) {
+ $time_diff = int($slave->{'replication'}->{Seconds_Behind_Master} / 3600) . "h " . ($slave->{'replication'}->{Seconds_Behind_Master} % 3600) . " secs";
+ } else {
+ $time_diff = $slave->{'replication'}->{Seconds_Behind_Master} . " secs";
+ }
+ }
+
+ my $state = sprintf "%.3f diff", $diff;
+ $state.= ", $time_diff" if defined($slave->{'replication'}->{Seconds_Behind_Master});
+ $state.= ", " . ($options->{'master'} || $slave->{replication}->{Master_Host}) . ":" . ($options->{'master-port'} || $slave->{replication}->{Master_Port}) . " (" . $master->{version} . ") -> " . $options->{slave} . ":" . $options->{'slave-port'} . " (" . $slave->{version} . ")";
+ $state.= " " . $table_diff_message if $table_diff_message;
+ $state.= "\n";
+
+ if ($diff >= $options->{'crit'}) {
+ print "CRITICAL: $state";
+ exit 2;
+ } elsif ($diff >= $options->{'warn'} || $table_diff_message) {
+ print "WARN: $state";
+ exit 1;
+ }
+ print "OK: $state";
+ exit 0;
+}
+
+get_status($options->{'slave'}, $options->{'slave-port'});
================================================================
More information about the pld-cvs-commit
mailing list