MySQL MHA安装

准备环境

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# 机器
Role | IP | host | server_id | type
------------ | -------------- | ------- | --------- | ----------
Monitor Host | 222.26.224.235 | manager | - | 监控复制组
Master | 222.26.224.238 | Master | 101 | 写入
Candicate Master | 222.26.224.237 | Slave1 | 102 |
Slave | 222.26.224.236 | Slave2 | 103 |
----------------------------------------------------------------
# 所有机器上安装 Manager Master Slave1 Slave2
shell> wget https://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
shell> rpm -ivh epel-release-6-8.noarch.rpm
shell> rm -rf epel-release-6-8.noarch.rpm
shell> yum install perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-CPAN perl-Time-HiRes -y

安装MHA NODE

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# 所有机器上安装 mha4mysql-node
shell> cd /usr/local/src
shell> tar zxvf mha4mysql-node-0.56.tar.gz
shell> cd mha4mysql-node-0.56
shell> perl Makefile.PL
shell> make
shell> make install
# 查看所有安装脚本
shell> cd /usr/local/bin/
[root@zookeeper03 bin]# ll
总用量 44
-r-xr-xr-x. 1 root root 16367 713 13:26 apply_diff_relay_logs
-r-xr-xr-x. 1 root root 4807 713 13:26 filter_mysqlbinlog
-r-xr-xr-x. 1 root root 8261 713 13:26 purge_relay_logs
-r-xr-xr-x. 1 root root 7525 713 13:26 save_binary_logs

安装MHA MANAGER

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 在MHA manager机器上安装 mha manager
shell> tar zxvf mha4mysql-manager-0.56.tar.gz
shell> cd mha4mysql-manager-0.56
shell> perl Makefile.PL
shell> make
shell> make install
# 查看安装脚本
shell> ls /usr/local/bin
# 复制脚本
shell> cd /usr/local/src/mha4mysql-manager-0.56/samples/scripts
shell> cp * /usr/local/bin/

配置SSH

1
2
3
4
5
6
7
# 在所有节点配置SSH,所有节点运行下面命令
shell> ssh-keygen -t rsa
shell> ssh-copy-id -i ~/.ssh/id_rsa.pub root@222.26.224.238
shell> ssh-copy-id -i ~/.ssh/id_rsa.pub root@222.26.224.237
shell> ssh-copy-id -i ~/.ssh/id_rsa.pub root@222.26.224.236
shell> ssh-copy-id -i ~/.ssh/id_rsa.pub root@222.26.224.235

所有机器舞密码登陆

配置Master-Slave架构

1
2
3
4
5
6
7
# 参考对应文档安装Master-Slave Replication
# 配置slave read-only
slave1> set global read_only=1;
slave1> show variables like '%read_only%';
slave2> set global read_only=1;
slave2> show variables like '%read_only%';

在Master添加监控用户

1
2
master> grant all privileges on *.* to 'root'@'222.26.224.%' identified by 'root@remote';
master> flush privileges;

配置MHA

1
2
3
4
5
6
# 在mha Manager配置MHA,在Manager主机
shell> mkdir /etc/masterha /var/log/masterha /var/log/masterha/app1
shell> pwd
/usr/local/src/mha4mysql-manager-0.56
shell> cp samples/conf/app1.cnf /etc/masterha/

配置MHA Manager app1.cnf

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
[server default]
manager_workdir=/var/log/masterha/app1.log
manager_log=/var/log/masterha/app1/manager.log
master_binlog_dir=/usr/local/mysql/mysql-files/
#master_ip_failover_script= /usr/local/bin/master_ip_failover
master_ip_online_change_script= /usr/local/bin/master_ip_online_change
password=root@remote
user=root
ping_interval=1
remote_workdir=/tmp
repl_password=slave
repl_user=slave
report_script=/usr/local/bin/send_report
secondary_check_script= /usr/local/bin/masterha_secondary_check -s 222.26.224.237 -s 222.26.224.238 --user=root --master_host=222.26.224.238 --master_ip=222.26.224.238 --master_port=3306
shutdown_script=""
ssh_user=root
[server1]
hostname=222.26.224.238
port=3306
[server2]
hostname=222.26.224.237
port=3306
candidate_master=1
check_repl_delay=0
[server3]
hostname=222.26.224.236
port=3306

在Slave配置relay log

1
2
slave1> set global relay_log_purge=0;
slave2> set global relay_log_purge=0;

在MANAGER MHA状态检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# SSH状态
shell> masterha_check_ssh --conf=/etc/masterha/app1.cnf
# REPL状态,注意app1.cnf的注释
shell> masterha_check_repl --conf=/etc/masterha/app1.cnf
# MHA RUN status
shell> masterha_check_status --conf=/etc/masterha/app1.cnf
# start mha manager 监控
shell> nohup masterha_manager --conf=/etc/masterha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /var/log/masterha/app1/manager.log 2>&1 &
# stop mha manager 监控
shell> masterha_stop --conf=/etc/masterha/app1.cnf

配置VIP[自动]

在master和Candicate Master安装keepalived
1
2
3
4
5
6
7
8
9
10
shell> yum install openssl-devel gcc* -y
shell> cd /usr/local/src
shell> wget http://www.keepalived.org/software/keepalived-1.2.23.tar.gz
shell> tar zxvf keepalived-1.2.23.tar.gz
shell> cd keepalived-1.2.23
shell> ./configure --prefix=/usr/local/keepalived
shell> make
shell> make install
shell> mkdir /etc/keepalived
shell> cp /usr/local/keepalived/etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf

Keepalived配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
! Configuration File for keepalived
global_defs {
notification_email {
weiniyangfg@163.com
}
notification_email_from weiniyangfg@163.com
smtp_server 127.0.0.1
smtp_connect_timeout 30
router_id MySQL-HA
}
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 51
priority 120
advert_int 1
nopreempt
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.200.23
}
}

Keepalived启动

1
2
3
4
5
6
7
8
9
# 在master启动keepalived
shell> cd /usr/local/keepalived
shell> sbin/keepalived --use-file=/etc/keepalived/keepalived.conf
shell> tail -f /var/log/messages
# 在Candicate Master启动keepalived
shell> cd /usr/local/keepalived
shell> sbin/keepalived --use-file=/etc/keepalived/keepalived.conf
shell> tail -f /var/log/messages

通过日志查看状态

Master

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
[root@hadoop03 keepalived]# tail -f /var/log/messages
Jul 14 16:09:07 hadoop03 Keepalived_vrrp[19784]: Registering Kernel netlink reflector
Jul 14 16:09:07 hadoop03 Keepalived_vrrp[19784]: Registering Kernel netlink command channel
Jul 14 16:09:07 hadoop03 Keepalived_vrrp[19784]: Registering gratuitous ARP shared channel
Jul 14 16:09:07 hadoop03 Keepalived_healthcheckers[19783]: Opening file '/etc/keepalived/keepalived.conf'.
Jul 14 16:09:07 hadoop03 Keepalived_vrrp[19784]: Opening file '/etc/keepalived/keepalived.conf'.
Jul 14 16:09:07 hadoop03 Keepalived_vrrp[19784]: Using LinkWatch kernel netlink reflector...
Jul 14 16:09:07 hadoop03 Keepalived_healthcheckers[19783]: Using LinkWatch kernel netlink reflector...
Jul 14 16:09:07 hadoop03 Keepalived_vrrp[19784]: VRRP_Instance(VI_1) Entering BACKUP STATE
Jul 14 16:09:11 hadoop03 Keepalived_vrrp[19784]: VRRP_Instance(VI_1) Transition to MASTER STATE
Jul 14 16:09:12 hadoop03 Keepalived_vrrp[19784]: VRRP_Instance(VI_1) Entering MASTER STATE

Candicate Master

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
Jul 14 16:09:46 hadoop02 Keepalived_healthcheckers[7763]: Registering Kernel netlink reflector
Jul 14 16:09:46 hadoop02 Keepalived_healthcheckers[7763]: Registering Kernel netlink command channel
Jul 14 16:09:46 hadoop02 Keepalived_healthcheckers[7763]: Opening file '/etc/keepalived/keepalived.conf'.
Jul 14 16:09:46 hadoop02 Keepalived_vrrp[7764]: Registering Kernel netlink reflector
Jul 14 16:09:46 hadoop02 Keepalived_vrrp[7764]: Registering Kernel netlink command channel
Jul 14 16:09:46 hadoop02 Keepalived_vrrp[7764]: Registering gratuitous ARP shared channel
Jul 14 16:09:46 hadoop02 Keepalived_vrrp[7764]: Opening file '/etc/keepalived/keepalived.conf'.
Jul 14 16:09:46 hadoop02 Keepalived_vrrp[7764]: Using LinkWatch kernel netlink reflector...
Jul 14 16:09:46 hadoop02 Keepalived_healthcheckers[7763]: Using LinkWatch kernel netlink reflector...
Jul 14 16:09:46 hadoop02 Keepalived_vrrp[7764]: VRRP_Instance(VI_1) Entering BACKUP STATE

MHA Keepalived配置

修改master_ip_failover脚本
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# 在MHA Manager上配置master_ip_failover
#!/usr/bin/env perl
# Copyright (C) 2011 DeNA Co.,Ltd.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
## Note: This is a sample script and is not complete. Modify the script based on your environment.
use strict;
use warnings FATAL => 'all';
use Getopt::Long;
use MHA::DBHelper;
my (
$command, $ssh_user, $orig_master_host,
$orig_master_ip, $orig_master_port, $new_master_host,
$new_master_ip, $new_master_port, $new_master_user,
$new_master_password
);
# new
my $vip = '192.168.200.23';
my $ssh_start_vip = "/etc/init.d/keepalived start";
my $ssh_stop_vip = "/etc/init.d/keepalived stop";
GetOptions(
'command=s' => \$command,
'ssh_user=s' => \$ssh_user,
'orig_master_host=s' => \$orig_master_host,
'orig_master_ip=s' => \$orig_master_ip,
'orig_master_port=i' => \$orig_master_port,
'new_master_host=s' => \$new_master_host,
'new_master_ip=s' => \$new_master_ip,
'new_master_port=i' => \$new_master_port,
'new_master_user=s' => \$new_master_user,
'new_master_password=s' => \$new_master_password,
);
exit &main();
# new
print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";
sub main {
if ( $command eq "stop" || $command eq "stopssh" ) {
# $orig_master_host, $orig_master_ip, $orig_master_port are passed.
# If you manage master ip address at global catalog database,
# invalidate orig_master_ip here.
my $exit_code = 1;
eval {
# new
print "Disabling the VIP on old master: $orig_master_host \n";
&stop_vip();
# updating global catalog, etc
$exit_code = 0;
};
if ($@) {
warn "Got Error: $@\n";
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "start" ) {
# all arguments are passed.
# If you manage master ip address at global catalog database,
# activate new_master_ip here.
# You can also grant write access (create user, set read_only=0, etc) here.
my $exit_code = 10;
eval {
my $new_master_handler = new MHA::DBHelper();
# args: hostname, port, user, password, raise_error_or_not
$new_master_handler->connect( $new_master_ip, $new_master_port,
$new_master_user, $new_master_password, 1 );
## Set read_only=0 on the new master
$new_master_handler->disable_log_bin_local();
print "Set read_only=0 on the new master.\n";
$new_master_handler->disable_read_only();
## Creating an app user on the new master
print "Creating app user on the new master..\n";
FIXME_xxx_create_user( $new_master_handler->{dbh} );
$new_master_handler->enable_log_bin_local();
$new_master_handler->disconnect();
## Update master ip on the catalog database, etc
# FIXME_xxx;
#new
print "Enabling the VIP - $vip on the new master - $new_master_host \n";
&start_vip();
$exit_code = 0;
};
if ($@) {
warn $@;
# If you want to continue failover, exit 10.
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "status" ) {
#new
print "Checking the Status of the script.. OK \n";
# do nothing
exit 0;
}
else {
&usage();
exit 1;
}
}
# A simple system call that enable the VIP on the new master
sub start_vip() {
`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
# A simple system call that disable the VIP on the old_master
sub stop_vip() {
return 0 unless ($ssh_user);
`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}
sub usage {
print
"Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}

配置MHA

1
2
3
4
5
6
# 在MHA Manager添加master_ip_failover_script
shell> grep master_ip_failover_script /etc/masterha/app1.cnf
master_ip_failover_script= /usr/local/bin/master_ip_failover

MHA状态

1
2
3
4
5
6
7
8
# 查看MHA状态
shell> masterha_check_repl --conf=/etc/masterha/app1.cnf
# 启动MHA
shell> nohup masterha_manager --conf=/etc/masterha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /var/log/masterha/app1/manager.log 2>&1 &

测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# 安装sysbench
shell> yum install git automake libtool -y
shell> git clone https://github.com/akopytov/sysbench.git
shell> cd sysbench/
shell> ./autogen.sh
shell> ./configure
shell> ./configure -with-mysql-includes=/usr/local/mysql --with-mysql-libs=/usr/local/mysql/lib/
shell> make
shell> make install
shell> export LD_LIBRARY_PATH=/usr/local/mysql/lib/
# Master测试
mysql> create database sbtest;
shell> sysbench --test=/usr/local/src/sysbench/sysbench/tests/db/oltp.lua --oltp-table-size=1000000 --oltp-read-only=off --init-rng=on --num-threads=16 --max-requests=0 --oltp-dist-type=uniform --max-time=1800 --mysql-user=root --mysql-socket=/usr/local/mysql/mysql-files/mysql.sock --mysql-password=root --db-driver=mysql --mysql-table-engine=innodb --oltp-test-mode=complex prepare
# stop one slave io_thread, 另一个slave不停止
shell> mysql -h 127.0.0.1 -proot -uroot --prompt='slave1> '
slave1> stop slave io_thread;
# Master压力测试
shell> sysbench --test=/usr/local/src/sysbench/sysbench/tests/db/oltp.lua --oltp-table-size=1000000 --oltp-read-only=off --init-rng=on --num-threads=16 --max-requests=0 --oltp-dist-type=uniform --max-time=180 --mysql-user=root --mysql-socket=/usr/local/mysql/mysql-files/mysql.sock --mysql-password=root --db-driver=mysql --mysql-table-engine=innodb --oltp-test-mode=complex run
# start stop io_thread's salve's io_thread
slave1> start slave io_thread;
# 停止Master mysqld
shell> pkill -9 mysqld
# 查看MHA日志
shell> cat /var/log/masterha/app1/manager.log

MHA failover日志

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
Fri Jul 15 10:50:00 2016 - [warning] Got error on MySQL select ping: 2006 (MySQL server has gone away)
Fri Jul 15 10:50:00 2016 - [info] Executing secondary network check script: /usr/local/bin/masterha_secondary_check -s 222.26.224.237 -s 222.26.224.238 --user=root --master_host=222.26.224.238 --master_ip=222.26.224.238 --master_port=3306 --user=root --master_host=222.26.224.238 --master_ip=222.26.224.238 --master_port=3306 --master_user=root --master_password=root@remote --ping_type=SELECT
Fri Jul 15 10:50:00 2016 - [info] Executing SSH check script: exit 0
Fri Jul 15 10:50:00 2016 - [info] HealthCheck: SSH to 222.26.224.238 is reachable.
Monitoring server 222.26.224.237 is reachable, Master is not reachable from 222.26.224.237\. OK.
Monitoring server 222.26.224.238 is reachable, Master is not reachable from 222.26.224.238\. OK.
Fri Jul 15 10:50:00 2016 - [info] Master is not reachable from all other monitoring servers. Failover should start.
Fri Jul 15 10:50:01 2016 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111)
Fri Jul 15 10:50:01 2016 - [warning] Connection failed 2 time(s)..
Fri Jul 15 10:50:02 2016 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111)
Fri Jul 15 10:50:02 2016 - [warning] Connection failed 3 time(s)..
Fri Jul 15 10:50:03 2016 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111)
Fri Jul 15 10:50:03 2016 - [warning] Connection failed 4 time(s)..
Fri Jul 15 10:50:03 2016 - [warning] Master is not reachable from health checker!
Fri Jul 15 10:50:03 2016 - [warning] Master 222.26.224.238(222.26.224.238:3306) is not reachable!
Fri Jul 15 10:50:03 2016 - [warning] SSH is reachable.
Fri Jul 15 10:50:03 2016 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha_default.cnf and /etc/masterha/app1.cnf again, and trying to connect to all servers to check server status..
Fri Jul 15 10:50:03 2016 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Fri Jul 15 10:50:03 2016 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Fri Jul 15 10:50:03 2016 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Fri Jul 15 10:50:04 2016 - [info] GTID failover mode = 1
Fri Jul 15 10:50:04 2016 - [info] Dead Servers:
Fri Jul 15 10:50:04 2016 - [info] 222.26.224.238(222.26.224.238:3306)
Fri Jul 15 10:50:04 2016 - [info] Alive Servers:
Fri Jul 15 10:50:04 2016 - [info] 222.26.224.237(222.26.224.237:3306)
Fri Jul 15 10:50:04 2016 - [info] 222.26.224.236(222.26.224.236:3306)
Fri Jul 15 10:50:04 2016 - [info] Alive Slaves:
Fri Jul 15 10:50:04 2016 - [info] 222.26.224.237(222.26.224.237:3306) Version=5.7.13-log (oldest major version between slaves) log-bin:enabled
Fri Jul 15 10:50:04 2016 - [info] GTID ON
Fri Jul 15 10:50:04 2016 - [info] Replicating from 222.26.224.238(222.26.224.238:3306)
Fri Jul 15 10:50:04 2016 - [info] Primary candidate for the new Master (candidate_master is set)
Fri Jul 15 10:50:04 2016 - [info] 222.26.224.236(222.26.224.236:3306) Version=5.7.13-log (oldest major version between slaves) log-bin:enabled
Fri Jul 15 10:50:04 2016 - [info] GTID ON
Fri Jul 15 10:50:04 2016 - [info] Replicating from 222.26.224.238(222.26.224.238:3306)
Fri Jul 15 10:50:04 2016 - [info] Checking slave configurations..
Fri Jul 15 10:50:04 2016 - [info] Checking replication filtering settings..
Fri Jul 15 10:50:04 2016 - [info] Replication filtering check ok.
Fri Jul 15 10:50:04 2016 - [info] Master is down!
Fri Jul 15 10:50:04 2016 - [info] Terminating monitoring script.
Fri Jul 15 10:50:04 2016 - [info] Got exit code 20 (Master dead).
Fri Jul 15 10:50:04 2016 - [info] MHA::MasterFailover version 0.56.
Fri Jul 15 10:50:04 2016 - [info] Starting master failover.
Fri Jul 15 10:50:04 2016 - [info]
Fri Jul 15 10:50:04 2016 - [info] * Phase 1: Configuration Check Phase..
Fri Jul 15 10:50:04 2016 - [info]
Fri Jul 15 10:50:05 2016 - [info] GTID failover mode = 1
Fri Jul 15 10:50:05 2016 - [info] Dead Servers:
Fri Jul 15 10:50:05 2016 - [info] 222.26.224.238(222.26.224.238:3306)
Fri Jul 15 10:50:05 2016 - [info] Checking master reachability via MySQL(double check)...
Fri Jul 15 10:50:05 2016 - [info] ok.
Fri Jul 15 10:50:05 2016 - [info] Alive Servers:
Fri Jul 15 10:50:05 2016 - [info] 222.26.224.237(222.26.224.237:3306)
Fri Jul 15 10:50:05 2016 - [info] 222.26.224.236(222.26.224.236:3306)
Fri Jul 15 10:50:05 2016 - [info] Alive Slaves:
Fri Jul 15 10:50:05 2016 - [info] 222.26.224.237(222.26.224.237:3306) Version=5.7.13-log (oldest major version between slaves) log-bin:enabled
Fri Jul 15 10:50:05 2016 - [info] GTID ON
Fri Jul 15 10:50:05 2016 - [info] Replicating from 222.26.224.238(222.26.224.238:3306)
Fri Jul 15 10:50:05 2016 - [info] Primary candidate for the new Master (candidate_master is set)
Fri Jul 15 10:50:05 2016 - [info] 222.26.224.236(222.26.224.236:3306) Version=5.7.13-log (oldest major version between slaves) log-bin:enabled
Fri Jul 15 10:50:05 2016 - [info] GTID ON
Fri Jul 15 10:50:05 2016 - [info] Replicating from 222.26.224.238(222.26.224.238:3306)
Fri Jul 15 10:50:05 2016 - [info] Starting GTID based failover.
Fri Jul 15 10:50:05 2016 - [info]
Fri Jul 15 10:50:05 2016 - [info] ** Phase 1: Configuration Check Phase completed.
Fri Jul 15 10:50:05 2016 - [info]
Fri Jul 15 10:50:05 2016 - [info] * Phase 2: Dead Master Shutdown Phase..
Fri Jul 15 10:50:05 2016 - [info]
Fri Jul 15 10:50:05 2016 - [info] Forcing shutdown so that applications never connect to the current master..
Fri Jul 15 10:50:05 2016 - [info] Executing master IP deactivation script:
Fri Jul 15 10:50:05 2016 - [info] /usr/local/bin/master_ip_failover --orig_master_host=222.26.224.238 --orig_master_ip=222.26.224.238 --orig_master_port=3306 --command=stopssh --ssh_user=root
Disabling the VIP on old master: 222.26.224.238
bash: /etc/init.d/keepalived: 没有那个文件或目录
Fri Jul 15 10:50:05 2016 - [info] done.
Fri Jul 15 10:50:05 2016 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master.
Fri Jul 15 10:50:05 2016 - [info] * Phase 2: Dead Master Shutdown Phase completed.
Fri Jul 15 10:50:05 2016 - [info]
Fri Jul 15 10:50:05 2016 - [info] * Phase 3: Master Recovery Phase..
Fri Jul 15 10:50:05 2016 - [info]
Fri Jul 15 10:50:05 2016 - [info] * Phase 3.1: Getting Latest Slaves Phase..
Fri Jul 15 10:50:05 2016 - [info]
Fri Jul 15 10:50:05 2016 - [info] The latest binary log file/position on all slaves is mysql-bin.000010:397675666
Fri Jul 15 10:50:05 2016 - [info] Retrieved Gtid Set: 71bbe17b-47fd-11e6-828f-005056be4afb:1-5577
Fri Jul 15 10:50:05 2016 - [info] Latest slaves (Slaves that received relay log files to the latest):
Fri Jul 15 10:50:05 2016 - [info] 222.26.224.236(222.26.224.236:3306) Version=5.7.13-log (oldest major version between slaves) log-bin:enabled
Fri Jul 15 10:50:05 2016 - [info] GTID ON
Fri Jul 15 10:50:05 2016 - [info] Replicating from 222.26.224.238(222.26.224.238:3306)
Fri Jul 15 10:50:05 2016 - [info] The oldest binary log file/position on all slaves is mysql-bin.000010:391361615
Fri Jul 15 10:50:05 2016 - [info] Retrieved Gtid Set: 71bbe17b-47fd-11e6-828f-005056be4afb:1-2785
Fri Jul 15 10:50:05 2016 - [info] Oldest slaves:
Fri Jul 15 10:50:05 2016 - [info] 222.26.224.237(222.26.224.237:3306) Version=5.7.13-log (oldest major version between slaves) log-bin:enabled
Fri Jul 15 10:50:05 2016 - [info] GTID ON
Fri Jul 15 10:50:05 2016 - [info] Replicating from 222.26.224.238(222.26.224.238:3306)
Fri Jul 15 10:50:05 2016 - [info] Primary candidate for the new Master (candidate_master is set)
Fri Jul 15 10:50:05 2016 - [info]
Fri Jul 15 10:50:05 2016 - [info] * Phase 3.3: Determining New Master Phase..
Fri Jul 15 10:50:05 2016 - [info]
Fri Jul 15 10:50:05 2016 - [info] Searching new master from slaves..
Fri Jul 15 10:50:05 2016 - [info] Candidate masters from the configuration file:
Fri Jul 15 10:50:05 2016 - [info] 222.26.224.237(222.26.224.237:3306) Version=5.7.13-log (oldest major version between slaves) log-bin:enabled
Fri Jul 15 10:50:05 2016 - [info] GTID ON
Fri Jul 15 10:50:05 2016 - [info] Replicating from 222.26.224.238(222.26.224.238:3306)
Fri Jul 15 10:50:05 2016 - [info] Primary candidate for the new Master (candidate_master is set)
Fri Jul 15 10:50:05 2016 - [info] Non-candidate masters:
Fri Jul 15 10:50:05 2016 - [info] Searching from candidate_master slaves which have received the latest relay log events..
Fri Jul 15 10:50:05 2016 - [info] Not found.
Fri Jul 15 10:50:05 2016 - [info] Searching from all candidate_master slaves..
Fri Jul 15 10:50:05 2016 - [info] New master is 222.26.224.237(222.26.224.237:3306)
Fri Jul 15 10:50:05 2016 - [info] Starting master failover..
Fri Jul 15 10:50:05 2016 - [info]
From:
222.26.224.238(222.26.224.238:3306) (current master)
+--222.26.224.237(222.26.224.237:3306)
+--222.26.224.236(222.26.224.236:3306)
To:
222.26.224.237(222.26.224.237:3306) (new master)
+--222.26.224.236(222.26.224.236:3306)
Fri Jul 15 10:50:05 2016 - [info]
Fri Jul 15 10:50:05 2016 - [info] * Phase 3.3: New Master Recovery Phase..
Fri Jul 15 10:50:05 2016 - [info]
Fri Jul 15 10:50:05 2016 - [info] Waiting all logs to be applied..
Fri Jul 15 10:50:07 2016 - [info] done.
Fri Jul 15 10:51:09 2016 - [info] Replicating from the latest slave 222.26.224.236(222.26.224.236:3306) and waiting to apply..
Fri Jul 15 10:51:09 2016 - [info] Waiting all logs to be applied on the latest slave..
Fri Jul 15 10:51:09 2016 - [info] Resetting slave 222.26.224.237(222.26.224.237:3306) and starting replication from the new master 222.26.224.236(222.26.224.236:3306)..
Fri Jul 15 10:51:09 2016 - [info] Executed CHANGE MASTER.
Fri Jul 15 10:51:10 2016 - [info] Slave started.
Fri Jul 15 10:51:10 2016 - [info] Waiting to execute all relay logs on 222.26.224.237(222.26.224.237:3306)..

MHA 附加

MHA NODE脚本

1
2
3
4
5
6
7
8
save_binary_logs 保存和复制master的二进制日志
apply_diff_relay_logs 识别差异的中继日志事件并将其差异的事件应用于其他的slave
filter_mysqlbinlog 去除不必要的ROLLBACK事件(MHA已不再使用这个工具)
purge_relay_logs 清除中继日志(不会阻塞SQL线程)

MHA MANAGER脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
masterha_check_ssh 检查MHA的SSH配置状况
masterha_check_repl 检查MySQL复制状况
masterha_manger 启动MHA
masterha_check_status 检测当前MHA运行状态
masterha_master_monitor 检测master是否宕机
masterha_master_switch 控制故障转移(自动或者手动)
masterha_conf_host 添加或删除配置的server信息

app1.cnf解释

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
[server default]
manager_workdir=/var/log/masterha/app1.log # 设置manager的工作目录
manager_log=/var/log/masterha/app1/manager.log # 设置manager的日志
master_binlog_dir=/data/mysql # 设置master 保存binlog的位置,以便MHA可以找到master的日志,我这里的也就是mysql的数据目录
master_ip_failover_script= /usr/local/bin/master_ip_failover # 设置自动failover时候的切换脚本
master_ip_online_change_script= /usr/local/bin/master_ip_online_change # 设置手动切换时候的切换脚本
password=123456 # 设置mysql中root用户的密码,这个密码是前文中创建监控用户的那个密码
user=root # 设置监控用户root
ping_interval=1 # 设置监控主库,发送ping包的时间间隔,默认是3秒,尝试三次没有回应的时候自动进行railover
remote_workdir=/tmp # 设置远端mysql在发生切换时binlog的保存位置
repl_password=123456 # 设置复制用户的密码
repl_user=repl # 设置复制环境中的复制用户名
report_script=/usr/local/send_report # 设置发生切换后发送的报警的脚本
secondary_check_script= /usr/local/bin/masterha_secondary_check -s server03 -s server02 --user=root --master_host=server02 --master_ip=192.168.0.50 --master_port=3306 # 一旦MHA到server02的监控之间出现问题,MHA Manager将会尝试从server03登录到server02
shutdown_script="" # 设置故障发生后关闭故障主机脚本(该脚本的主要作用是关闭主机放在发生脑裂,这里没有使用)
ssh_user=root # 设置ssh的登录用户名
[server1]
hostname=192.168.0.50
port=3306
[server2]
hostname=192.168.0.60
port=3306
candidate_master=1 # 设置为候选master,如果设置该参数以后,发生主从切换以后将会将此从库提升为主库,即使这个主库不是集群中事件最新的slave
check_repl_delay=0 # 默认情况下如果一个slave落后master 100M的relay logs的话,MHA将不会选择该slave作为一个新的master,因为对于这个slave的恢复需要花费很长时间,通过设置check_repl_delay=0,MHA触发切换在选择一个新的master的时候将会忽略复制延时,这个参数对于设置了candidate_master=1的主机非常有用,因为这个候选主在切换的过程中一定是新的master
[server3]
hostname=192.168.0.70
port=3306

参考

MHA介绍和安装

MHA并行复制