对于mysql读写分离,网上各种解决方案还是比较多,在这边我选择Maxscale,一是功能能满足需求配置简单,二是我后端的数据全部用的mariadb,跟Maxscale本是一家,搭配起来也方便,这边基本的架构是数据库三个节点,一主两从,使用MHA做高可用,前端用Maxscale做读写分离,Maxscale配合Corosync-Pacemaker做高可用。

这里有一个问题,那就是Mariadb10.3.6不支持super_read_only,所以MHA只能用mha4mysql-0.56,最新版的mha4mysql-0.58支持super_read_only,mysql5.7可以用。

架构图:

###环境简要介绍:

maxscale61.blufly.com   192.168.5.61  MaxScale-master
maxscale62.blufly.com   192.168.5.62  MaxScale-backup
MSVIP:192.168.5.60
db51.blufly.com  192.168.5.51   Master
db52.blufly.com  192.168.5.52   Slave1
db53.blufly.com   192.168.5.53   Slave2
DBVIP:192.168.5.55

#初始化

ntpdate 120.25.108.11
/root/init_system_centos7.sh

#hosts文件配置

cat >> /etc/hosts << EOF
192.168.5.61 maxscale61.blufly.com
192.168.5.62 maxscale62.blufly.com
192.168.5.51 db51.blufly.com
192.168.5.52 db52.blufly.com
192.168.5.53 db53.blufly.com
EOF

###------------------ 一、mysql GTID主从复制 ------------------###

#下载安装包

cd /opt
wget http://mirrors.neusoft.edu.cn/mariadb//mariadb-10.3.6/source/mariadb-10.3.6.tar.gz
wget https://github.com/jemalloc/jemalloc/releases/download/5.1.0/jemalloc-5.1.0.tar.bz2

#安装系统组件

yum install -y cmake ncurses-devel openssl-devel openssl zlib-devel

#卸载系统自带mariadb-libs

rpm -qa|grep mariadb
rpm -e --nodeps mariadb-libs
find / -name "mariadb" -exec rm -rf {} \;
rm -rf /etc/my.cnf /etc/my.cnf.d/

#、安装mariadb

#安装jemalloc内存优化工具

cd /opt
tar -jxvf jemalloc-5.1.0.tar.bz2
cd jemalloc-5.1.0
./configure --prefix=/usr/local/jemalloc --libdir=/usr/local/lib
make;make install
echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf
ldconfig
cd ../
cd /opt
mkdir -p /usr/local/mysql
mkdir -p /data/mysql
mkdir -p /data/mysql/binlog
groupadd -r mysql
useradd -g mysql -r -s /sbin/nologin -d /data/mysql/ mysql
chown mysql.mysql -R /data/mysql
chown mysql.mysql -R /usr/local/mysql
mkdir -p /var/log/mariadb
mkdir -p /var/run/mariadb
chown mysql -R /var/run/mariadb
chown mysql -R /var/log/mariadb
tar -zxvf mariadb-10.3.6.tar.gz
cd mariadb-10.3.6
切记:如果之前编译有错误,需要重新编译,请删除CMakeCache.txt
rm -rf CMakeCache.txt
cmake . -DCMAKE_INSTALL_PREFIX=/usr/local/mysql \
-DMYSQL_DATADIR=/data/mysql/ \
-DSYSCONFDIR=/etc \
-DWITHOUT_TOKUDB=1 \
-DWITH_INNOBASE_STORAGE_ENGINE=1 \
-DWITH_ARCHIVE_STPRAGE_ENGINE=1 \
-DWITH_BLACKHOLE_STORAGE_ENGINE=1 \
-DMYSQL_TCP_PORT=9106 \
-DWIYH_READLINE=1 \
-DWIYH_SSL=system \
-DVITH_ZLIB=system \
-DWITH_LOBWRAP=0 \
-DMYSQL_UNIX_ADDR=/tmp/mysql.sock \
-DDEFAULT_CHARSET=utf8 \
-DDEFAULT_COLLATION=utf8_general_ci \
-DWITH_EXTRA_CHARSETS=all \
-DCMAKE_EXE_LINKER_FLAGS='-ljemalloc' \
-DWITH_SAFEMALLOC=OFF
make && make install

#初始化MariaDB

cd /usr/local/mysql/
./scripts/mysql_install_db --user=mysql --datadir=/data/mysql/

#创建启动脚本

cp support-files/mysql.server /etc/rc.d/init.d/mysqld
chmod +x /etc/init.d/mysqld

##------ 设置my.cnf(master)-------##

[client]
port = 9106
socket=/tmp/mysql.sock[mysql]
#no-auto-rehash
auto-rehash
[mysqld]
port = 9106
datadir=/data/mysql
socket=/tmp/mysql.sock
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
#*** network ***
back_log = 512
max_connections = 3000
max_connect_errors = 6000
table_open_cache = 1024
max_allowed_packet = 32M
# *** global cache ***
read_buffer_size = 8M
read_rnd_buffer_size = 64M
sort_buffer_size = 16M
join_buffer_size = 16M
# *** thread ***
thread_cache_size = 300
thread_stack = 512K
# *** query  cache ***
query_cache_size = 128M
query_cache_limit = 4M
# *** index ***
ft_min_word_len = 8
default-storage-engine = INNODB
transaction_isolation = REPEATABLE-READ
# *** tmp table ***
tmp_table_size = 256M
max_heap_table_size = 256M
# *** slow query log ***
slow_query_log
long_query_time = 10
log-queries-not-using-indexes
#*** MyISAM Specific options
key_buffer_size = 256M
read_buffer_size = 1M
read_rnd_buffer_size = 16M
bulk_insert_buffer_size = 256M
myisam_sort_buffer_size = 256M
myisam_max_sort_file_size = 10G
myisam_repair_threads = 1
myisam_recover_options=force,backup
# *** INNODB Specific options ***
#innodb_additional_mem_pool_size = 64M
innodb_buffer_pool_size = 512M
innodb_data_file_path = ibdata1:10M:autoextend
#innodb_data_home_dir = <directory>
innodb_write_io_threads = 8
innodb_read_io_threads = 8
#innodb_force_recovery=1
innodb_thread_concurrency = 16
innodb_flush_log_at_trx_commit = 2
#innodb_fast_shutdown
innodb_log_buffer_size = 16M
innodb_log_file_size = 256M
innodb_log_files_in_group = 3
#innodb_log_group_home_dir
innodb_max_dirty_pages_pct = 90
#innodb_flush_method=O_DSYNC
innodb_lock_wait_timeout = 120
# 禁止MySQL对外部连接进行DNS解析
skip-name-resolve
lower_case_table_names = 1
#设置服务器ID
server-id= 51
# 设置同步数据库
#binlog-do-db = blufly
#不对mysql库进行日志记录操作
binlog-ignore-db=test,information_schema,performance_schema
#不对test进行复制操作
replicate-ignore-db=test,information_schema,performance_schema
# 打开日志
#binlog日志格式,mysql默认采用statement,建议使用mixed
binlog_format=mixed
#binlog日志文件
log-bin=/data/mysql/binlog/mysql-bin.log
log-bin-index=/data/mysql/binlog/mysql-bin.index
#binlog过期清理时间
expire_logs_days=30
#binlog每个日志文件大小
max-binlog-size=100M
#binlog缓存大小
binlog_cache_size=4M
#最大binlog缓存大小
max_binlog_cache_size=10M
binlog_stmt_cache_size=2M
log-slave-updates=true
master-info-repository=TABLE
relay-log-info-repository=TABLE
sync-master-info=1
slave-parallel-threads=2
binlog-checksum=CRC32
master-verify-checksum=1
slave-sql-verify-checksum=1
binlog-rows-query-log_events=1
[mysqldump]
quick
max_allowed_packet = 32M
[myisamchk]
key_buffer_size = 2048M
sort_buffer_size = 2048M
read_buffer = 32M
write_buffer = 32M
[mysqlhotcopy]
interactive-timeout
[mysqld_safe]
open-files-limit = 10240
log-error=/var/log/mariadb/mariadb.log
pid-file=/var/run/mariadb/mariadb.pid

##-------- 设置my.cnf(slave52)------##

##-------- 设置my.cnf(slave53)------##

[client]
port = 9106
socket=/tmp/mysql.sock[mysql]
#no-auto-rehash
auto-rehash
[mysqld]
port = 9106
datadir=/data/mysql
socket=/tmp/mysql.sock
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
#*** network ***
back_log = 512
max_connections = 3000
max_connect_errors = 6000
table_open_cache = 1024
max_allowed_packet = 32M
# *** global cache ***
read_buffer_size = 8M
read_rnd_buffer_size = 64M
sort_buffer_size = 16M
join_buffer_size = 16M
# *** thread ***
thread_cache_size = 300
thread_stack = 512K
# *** query  cache ***
query_cache_size = 128M
query_cache_limit = 4M
# *** index ***
ft_min_word_len = 8
default-storage-engine = INNODB
transaction_isolation = REPEATABLE-READ
# *** tmp table ***
tmp_table_size = 256M
max_heap_table_size = 256M
# *** slow query log ***
slow_query_log
long_query_time = 10
log-queries-not-using-indexes
#*** MyISAM Specific options
key_buffer_size = 256M
read_buffer_size = 1M
read_rnd_buffer_size = 16M
bulk_insert_buffer_size = 256M
myisam_sort_buffer_size = 256M
myisam_max_sort_file_size = 10G
myisam_repair_threads = 1
myisam_recover_options=force,backup
# *** INNODB Specific options ***
#innodb_additional_mem_pool_size = 64M
innodb_buffer_pool_size = 512M
innodb_data_file_path = ibdata1:10M:autoextend
#innodb_data_home_dir = <directory>
innodb_write_io_threads = 8
innodb_read_io_threads = 8
#innodb_force_recovery=1
innodb_thread_concurrency = 16
innodb_flush_log_at_trx_commit = 2
#innodb_fast_shutdown
innodb_log_buffer_size = 16M
innodb_log_file_size = 256M
innodb_log_files_in_group = 3
#innodb_log_group_home_dir
innodb_max_dirty_pages_pct = 90
#innodb_flush_method=O_DSYNC
innodb_lock_wait_timeout = 120
# 禁止MySQL对外部连接进行DNS解析
skip-name-resolve
lower_case_table_names = 1
#设置服务器ID
server-id= 52
# 设置同步数据库
#binlog-do-db = blufly
#不对test进行复制操作
replicate-ignore-db=test,information_schema,performance_schema
# 打开日志
binlog-format=mixed
relay-log=/data/mysql/binlog/mysql-relay-bin.log
relay-log-index=/data/mysql/binlog/mysql-relay-bin.index
max_relay_log_size=100M
expire_logs_days=30
log-slave-updates=true
master-info-repository=TABLE
relay-log-info-repository=TABLE
sync-master-info=1
slave-parallel-threads=2
binlog-checksum=CRC32
master-verify-checksum=1
slave-sql-verify-checksum=1
binlog-rows-query-log_events=1
[mysqldump]
quick
max_allowed_packet = 32M
[myisamchk]
key_buffer_size = 2048M
sort_buffer_size = 2048M
read_buffer = 32M
write_buffer = 32M
[mysqlhotcopy]
interactive-timeout
[mysqld_safe]
open-files-limit = 10240
log-error=/var/log/mariadb/mariadb.log
pid-file=/var/run/mariadb/mariadb.pid

##-----------------------------##

#启动mariadb

/etc/rc.d/init.d/mysqld start
/sbin/chkconfig --add mysqld
/sbin/chkconfig --level 2345 mysqld on
ln -s /usr/local/mysql/bin/mysql /sbin/mysql
ln -s /usr/local/mysql/bin/mysqladmin /sbin/mysqladmin
ln -s /usr/local/mysql/bin/mysqlbinlog /sbin/mysqlbinlog

#设置环境变量

echo 'export PATH=$PATH:/usr/local/mysql/bin' > /etc/profile.d/mysql.sh
chmod +x /etc/profile.d/mysql.sh
source /etc/profile.d/mysql.sh

#修改修改root密码,添加新用户

mysqladmin -u root password '753951'
mysql -uroot -p753951 -P9106
#授权一个网段
msqyl> grant all on *.* to 'blufly'@'192.168.5.%' identified by '852741';
msqyl> flush privileges;
msqyl> exit;
/etc/rc.d/init.d/mysqld restart

#查看jemalloc是否生效

yum install lsof -y
lsof -n | grep jemalloc

#在master上创建有复制权限的帐号

mysql -uroot -p
grant replication slave,replication client on *.* to "repl"@'192.168.5.%' identified by 'qazqwe1688';
flush privileges;
exit;

#数据库备份还原

#master备份(db51):

mysqldump -uroot -p753951 --all-databases --lock-all-tables --flush-logs --master-data=2 > /opt/mysql20180905.sql
scp -P 65535 /opt/mysql20180905.sql 192.168.5.52:/opt/mysql20180905.sql
scp -P 65535 /opt/mysql20180905.sql 192.168.5.53:/opt/mysql20180905.sql

#slave还原(db52、db53):

mysql -uroot -p753951 < /opt/mysql20180905.sql

#困为master数据库是全备,包括了mysql授权表,所以在slave上还原数据库后,要重新刷新一下授权表

mysql -uroot -p
flush privileges;

#获取master状态值(db51)

mysql -uroot -p
MariaDB [(none)]> show master status;
+------------------+----------+--------------+--------------------------------------------------+
| File             | Position | Binlog_Do_DB | Binlog_Ignore_DB                                 |
+------------------+----------+--------------+--------------------------------------------------+
| mysql-bin.000010 |      358 |              | mysql,test,information_schema,performance_schema |
+------------------+----------+--------------+--------------------------------------------------+
1 row in set (0.000 sec)

#用BINLOG_GTID_POS()函数来计算GTID

MariaDB [(none)]> SELECT BINLOG_GTID_POS("mysql-bin.000003", 727);
+------------------------------------------+
| BINLOG_GTID_POS("mysql-bin.000003", 727) |
+------------------------------------------+
| 0-51-430                                 |
+------------------------------------------+
1 row in set (0.007 sec)

#SLAVE可以通过设置 @@gtid_slave_pos 的值来设定复制的起始位置,用 CHANGE MASTER 把这个值传给主库(db52、db53)

mysql -uroot -p
SET GLOBAL gtid_slave_pos = "0-51-430";
CHANGE MASTER TO MASTER_HOST='192.168.5.51',MASTER_USER='repl',MASTER_PASSWORD='qazqwe1688',MASTER_PORT=9106,MASTER_USE_GTID=slave_pos;

#在slave上查看同步状态

MariaDB [mysql]> start slave;
MariaDB [mysql]> show slave status\G
*************************** 1. row ***************************Slave_IO_State: Waiting for master to send eventMaster_Host: 192.168.5.51Master_User: replMaster_Port: 9106Connect_Retry: 60Master_Log_File: mysql-bin.000003Read_Master_Log_Pos: 40778Relay_Log_File: mysql-relay-bin.000002Relay_Log_Pos: 40778Relay_Master_Log_File: mysql-bin.000003Slave_IO_Running: YesSlave_SQL_Running: YesReplicate_Do_DB: Replicate_Ignore_DB: test,mysql,information_schema,performance_schemaReplicate_Do_Table: Replicate_Ignore_Table: Replicate_Wild_Do_Table: Replicate_Wild_Ignore_Table: Last_Errno: 0Last_Error: Skip_Counter: 0Exec_Master_Log_Pos: 40778Relay_Log_Space: 41087Until_Condition: NoneUntil_Log_File: Until_Log_Pos: 0Master_SSL_Allowed: NoMaster_SSL_CA_File: Master_SSL_CA_Path: Master_SSL_Cert: Master_SSL_Cipher: Master_SSL_Key: Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: NoLast_IO_Errno: 0Last_IO_Error: Last_SQL_Errno: 0Last_SQL_Error: Replicate_Ignore_Server_Ids: Master_Server_Id: 51Master_SSL_Crl: Master_SSL_Crlpath: Using_Gtid: Slave_PosGtid_IO_Pos: 0-51-464Replicate_Do_Domain_Ids: Replicate_Ignore_Domain_Ids: Parallel_Mode: conservativeSQL_Delay: 0SQL_Remaining_Delay: NULLSlave_SQL_Running_State: Slave has read all relay log; waiting for the slave I/O thread to update it
1 row in set (0.000 sec)

#将两台slave服务器设置read_only,从库对外提供读服务,之所以没有写进配置文件,是因为随时slave会提升为master

mysql -uroot -p753951 -e "set global read_only=1"

###--------------------- 二、MySQL高可用之MHA ----------------------###

#Checking if super_read_only is defined and turned on..DBD::mysql::st execute failed: Unknown system variable 'super_read_only'

#Mariadb10.3.6不支持super_read_only只能用mha4mysql-0.56

#mysql5.7支持super_read_only,可以配合mha4mysql-0.58

#show variables like ‘super_read_only’;

#--------------------------------------------------------#

#在所有节点都要安装MHA node所需的perl模块(DBD:mysql)(db51、db52、db53)

rpm -vih http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/e/epel-release-7-11.noarch.rpm
yum -y install perl-DBD-MySQL perl-devel perl-CPAN

#在所有的节点安装mha node

cd /opt
tar -zxvf mha4mysql-node-0.56.tar.gz
cd mha4mysql-node-0.56
perl Makefile.PL
make && make install

#配置相互SSH登录无密码验证

#DB51

[root@db51 ~]# ssh-keygen -t rsa
[root@db51 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 root@192.168.5.51
[root@db51 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 root@192.168.5.52
[root@db51 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 root@192.168.5.53

#DB52

[root@db52 ~]# ssh-keygen -t rsa
[root@db52 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 root@192.168.5.51
[root@db52 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 root@192.168.5.52
[root@db52 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 root@192.168.5.53

#DB53

[root@db53 ~]# ssh-keygen -t rsa
[root@db53 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 root@192.168.5.51
[root@db53 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 root@192.168.5.52
[root@db53 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 root@192.168.5.53

#无密码登录测试

ssh -p 65535 root@192.168.5.51

#把DB53作为MHA Manger

[root@db53 ~]# cd /opt
#首先安装MHA Manger依赖的perl模块
[root@db53 ~]# yum install perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Time-HiRes -y
[root@db53 ~]# tar -zxvf mha4mysql-manager-0.56.tar.gz
[root@db53 ~]# cd mha4mysql-manager-0.56
[root@db53 ~]# perl Makefile.PL
[root@db53 ~]# make && make install

#配置MHA

[root@db53 ~]# mkdir -p /etc/masterha
[root@db53 ~]# mkdir -p /var/log/masterha
[root@db53 ~]# cp samples/conf/app1.cnf /etc/masterha/

#编辑app1.cnf配置文件

[root@db53 ~]# cat /etc/masterha/app1.cnf
[server default]
manager_workdir=/var/log/masterha/app1
manager_log=/var/log/masterha/app1/manager.log
master_binlog_dir=/data/mysql/binlog
#master_ip_failover_script=/usr/local/bin/master_ip_failover
#master_ip_online_change_script=/usr/local/bin/master_ip_online_change
password=852741
user=blufly
ping_interval=1
remote_workdir=/data/mysql/binlog
repl_password=qazqwe1688
repl_user=repl
report_script=/usr/local/bin/send_report
secondary_check_script= /usr/local/bin/masterha_secondary_check -s 192.168.5.51 -s 192.168.5.52
shutdown_script=""
ssh_user=root
ssh_port=65535
[server1]
hostname=192.168.5.51
port=9106
[server2]
hostname=192.168.5.52
port=9106
candidate_master=1
check_repl_delay=0
[server3]
hostname=192.168.5.53
port=9106

#故障邮件报警脚本

[root@db53 ~]# cat /usr/local/bin/send_report
#!/usr/bin/perl
#  Copyright (C) 2011 DeNA Co.,Ltd.
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#  Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
## Note: This is a sample script and is not complete. Modify the script based on your environment.
use strict;
use warnings FATAL => 'all';
use Mail::Sender;
use Getopt::Long;
#new_master_host and new_slave_hosts are set only when recovering master succeeded
my ( $dead_master_host, $new_master_host, $new_slave_hosts, $subject, $body );
my $smtp='smtp.163.com';
my $mail_from='1234567@163.com';
my $mail_user='1234567@163.com';
my $mail_pass='1234567';
my $mail_to=['1234567@163.com'];
GetOptions('orig_master_host=s' => \$dead_master_host,'new_master_host=s'  => \$new_master_host,'new_slave_hosts=s'  => \$new_slave_hosts,'subject=s'          => \$subject,'body=s'             => \$body,
);
mailToContacts($smtp,$mail_from,$mail_user,$mail_pass,$mail_to,$subject,$body);
sub mailToContacts {my ( $smtp, $mail_from, $user, $passwd, $mail_to, $subject, $msg ) = @_;open my $DEBUG, "> /tmp/monitormail.log"or die "Can't open the debug      file:$!\n";my $sender = new Mail::Sender {ctype       => 'text/plain; charset=utf-8',encoding    => 'utf-8',smtp        => $smtp,from        => $mail_from,auth        => 'LOGIN',TLS_allowed => '0',authid      => $user,authpwd     => $passwd,to          => $mail_to,subject     => $subject,debug       => $DEBUG};$sender->MailMsg({   msg   => $msg,debug => $DEBUG}) or print $Mail::Sender::Error;return 1;
}
# Do whatever you want here
exit 0;

#在每个slave节点上设置relay log的清除方式

#db52

[root@db52 ~]# mysql -uroot -p753951 -e "set global relay_log_purge=0"

#db53

[root@db53 ~]# mysql -uroot -p753951 -e "set global relay_log_purge=0"

#在每个slave节点上设置定期清理relay脚本

[root@db52 ~]# cat /root/purge_relay_log.sh
#!/bin/bash
user=blufly
passwd=852741
port=9106
log_dir='/data/masterha/log'
work_dir='/data'
purge='/usr/local/bin/purge_relay_logs'
if [ ! -d $log_dir ]
thenmkdir $log_dir -p
fi
$purge --user=$user --password=$passwd --disable_relay_log_purge --port=$port --workdir=$work_dir >> $log_dir/purge_relay_logs.log 2>&1

#设置计划任务(db52、db53)

echo "0 3 * * * /bin/bash /root/purge_relay_log.sh > /dev/null 2>&1" >> /etc/crontab

#手动执行purge_relay_logs脚本删除中继日志

[root@db52 ~]# purge_relay_logs --user=root --password=753951 --port=9106 -disable_relay_log_purge --workdir=/data/
2018-09-06 15:50:26: purge_relay_logs script started.Found relay_log.info: /data/mysql/relay-log.infoOpening /data/mysql/binlog/mysql-relay-bin.000003 ..Opening /data/mysql/binlog/mysql-relay-bin.000004 ..Executing SET GLOBAL relay_log_purge=1; FLUSH LOGS; sleeping a few seconds so that SQL thread can delete older relay log files (if it keeps up); SET GLOBAL relay_log_purge=0; .. ok.
2018-09-06 15:50:29: All relay log purging operations succeeded.

#在Monitor监控节点上检查SSH配置(db53)

#先注释掉这行master_ip_failover_script=/usr/local/scripts/master_ip_failover

[root@db53 ~]# masterha_check_ssh --conf=/etc/masterha/app1.cnf
Thu Sep  6 16:13:59 2018 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Thu Sep  6 16:13:59 2018 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Thu Sep  6 16:13:59 2018 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Thu Sep  6 16:13:59 2018 - [info] Starting SSH connection tests..
Thu Sep  6 16:14:01 2018 - [debug]
Thu Sep  6 16:13:59 2018 - [debug]  Connecting via SSH from root@192.168.5.51(192.168.5.51:65535) to root@192.168.5.52(192.168.5.52:65535)..
Thu Sep  6 16:14:00 2018 - [debug]   ok.
Thu Sep  6 16:14:00 2018 - [debug]  Connecting via SSH from root@192.168.5.51(192.168.5.51:65535) to root@192.168.5.53(192.168.5.53:65535)..
Thu Sep  6 16:14:01 2018 - [debug]   ok.
Thu Sep  6 16:14:01 2018 - [debug]
Thu Sep  6 16:13:59 2018 - [debug]  Connecting via SSH from root@192.168.5.52(192.168.5.52:65535) to root@192.168.5.51(192.168.5.51:65535)..
Thu Sep  6 16:14:00 2018 - [debug]   ok.
Thu Sep  6 16:14:00 2018 - [debug]  Connecting via SSH from root@192.168.5.52(192.168.5.52:65535) to root@192.168.5.53(192.168.5.53:65535)..
Thu Sep  6 16:14:01 2018 - [debug]   ok.
Thu Sep  6 16:14:02 2018 - [debug]
Thu Sep  6 16:14:00 2018 - [debug]  Connecting via SSH from root@192.168.5.53(192.168.5.53:65535) to root@192.168.5.51(192.168.5.51:65535)..
Thu Sep  6 16:14:01 2018 - [debug]   ok.
Thu Sep  6 16:14:01 2018 - [debug]  Connecting via SSH from root@192.168.5.53(192.168.5.53:65535) to root@192.168.5.52(192.168.5.52:65535)..
Thu Sep  6 16:14:02 2018 - [debug]   ok.
Thu Sep  6 16:14:02 2018 - [info] All SSH connection tests passed successfully.

#检查mysql主从复制(db53)

masterha_check_repl --conf=/etc/masterha/app1.cnf
[root@db53 /]# masterha_check_repl --conf=/etc/masterha/app1.cnf
Thu Sep  6 19:46:46 2018 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Thu Sep  6 19:46:46 2018 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Thu Sep  6 19:46:46 2018 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Thu Sep  6 19:46:46 2018 - [info] MHA::MasterMonitor version 0.56.
Thu Sep  6 19:46:47 2018 - [info] GTID failover mode = 0
Thu Sep  6 19:46:47 2018 - [info] Dead Servers:
Thu Sep  6 19:46:47 2018 - [info] Alive Servers:
Thu Sep  6 19:46:47 2018 - [info]   192.168.5.51(192.168.5.51:9106)
Thu Sep  6 19:46:47 2018 - [info]   192.168.5.52(192.168.5.52:9106)
Thu Sep  6 19:46:47 2018 - [info]   192.168.5.53(192.168.5.53:9106)
Thu Sep  6 19:46:47 2018 - [info] Alive Slaves:
Thu Sep  6 19:46:47 2018 - [info]   192.168.5.52(192.168.5.52:9106)  Version=10.3.6-MariaDB-log (oldest major version between slaves) log-bin:enabled
Thu Sep  6 19:46:47 2018 - [info]     Replicating from 192.168.5.51(192.168.5.51:9106)
Thu Sep  6 19:46:47 2018 - [info]     Primary candidate for the new Master (candidate_master is set)
Thu Sep  6 19:46:47 2018 - [info]   192.168.5.53(192.168.5.53:9106)  Version=10.3.6-MariaDB-log (oldest major version between slaves) log-bin:enabled
Thu Sep  6 19:46:47 2018 - [info]     Replicating from 192.168.5.51(192.168.5.51:9106)
Thu Sep  6 19:46:47 2018 - [info] Current Alive Master: 192.168.5.51(192.168.5.51:9106)
Thu Sep  6 19:46:47 2018 - [info] Checking slave configurations..
Thu Sep  6 19:46:47 2018 - [info] Checking replication filtering settings..
Thu Sep  6 19:46:47 2018 - [info]  binlog_do_db= , binlog_ignore_db= information_schema,mysql,performance_schema,test
Thu Sep  6 19:46:47 2018 - [info]  Replication filtering check ok.
Thu Sep  6 19:46:47 2018 - [info] GTID (with auto-pos) is not supported
Thu Sep  6 19:46:47 2018 - [info] Starting SSH connection tests..
Thu Sep  6 19:46:51 2018 - [info] All SSH connection tests passed successfully.
Thu Sep  6 19:46:51 2018 - [info] Checking MHA Node version..
Thu Sep  6 19:46:51 2018 - [info]  Version check ok.
Thu Sep  6 19:46:51 2018 - [info] Checking SSH publickey authentication settings on the current master..
Thu Sep  6 19:46:52 2018 - [info] HealthCheck: SSH to 192.168.5.51 is reachable.
Thu Sep  6 19:46:52 2018 - [info] Master MHA Node version is 0.56.
Thu Sep  6 19:46:52 2018 - [info] Checking recovery script configurations on 192.168.5.51(192.168.5.51:9106)..
Thu Sep  6 19:46:52 2018 - [info]   Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql/binlog --output_file=/data/mysql/binlog/save_binary_logs_test --manager_version=0.56 --start_file=mysql-bin.000004
Thu Sep  6 19:46:52 2018 - [info]   Connecting to root@192.168.5.51(192.168.5.51:65535).. Creating /data/mysql/binlog if not exists..    ok.Checking output directory is accessible or not..ok.Binlog found at /data/mysql/binlog, up to mysql-bin.000004
Thu Sep  6 19:46:53 2018 - [info] Binlog setting check done.
Thu Sep  6 19:46:53 2018 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers..
Thu Sep  6 19:46:53 2018 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='blufly' --slave_host=192.168.5.52 --slave_ip=192.168.5.52 --slave_port=9106 --workdir=/data/mysql/binlog --target_version=10.3.6-MariaDB-log --manager_version=0.56 --relay_log_info=/data/mysql/relay-log.info  --relay_dir=/data/mysql/  --slave_pass=xxx
Thu Sep  6 19:46:53 2018 - [info]   Connecting to root@192.168.5.52(192.168.5.52:65535).. Checking slave recovery environment settings..Opening /data/mysql/relay-log.info ... ok.Relay log found at /data/mysql/binlog, up to mysql-relay-bin.000003Temporary relay log file is /data/mysql/binlog/mysql-relay-bin.000003Testing mysql connection and privileges.. done.Testing mysqlbinlog output.. done.Cleaning up test file(s).. done.
Thu Sep  6 19:46:53 2018 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='blufly' --slave_host=192.168.5.53 --slave_ip=192.168.5.53 --slave_port=9106 --workdir=/data/mysql/binlog --target_version=10.3.6-MariaDB-log --manager_version=0.56 --relay_log_info=/data/mysql/relay-log.info  --relay_dir=/data/mysql/  --slave_pass=xxx
Thu Sep  6 19:46:53 2018 - [info]   Connecting to root@192.168.5.53(192.168.5.53:65535).. Checking slave recovery environment settings..Opening /data/mysql/relay-log.info ... ok.Relay log found at /data/mysql/binlog, up to mysql-relay-bin.000003Temporary relay log file is /data/mysql/binlog/mysql-relay-bin.000003Testing mysql connection and privileges.. done.Testing mysqlbinlog output.. done.Cleaning up test file(s).. done.
Thu Sep  6 19:46:54 2018 - [info] Slaves settings check done.
Thu Sep  6 19:46:54 2018 - [info]
192.168.5.51(192.168.5.51:9106) (current master)+--192.168.5.52(192.168.5.52:9106)+--192.168.5.53(192.168.5.53:9106)
Thu Sep  6 19:46:54 2018 - [info] Checking replication health on 192.168.5.52..
Thu Sep  6 19:46:54 2018 - [info]  ok.
Thu Sep  6 19:46:54 2018 - [info] Checking replication health on 192.168.5.53..
Thu Sep  6 19:46:54 2018 - [info]  ok.
Thu Sep  6 19:46:54 2018 - [warning] master_ip_failover_script is not defined.
Thu Sep  6 19:46:54 2018 - [warning] shutdown_script is not defined.
Thu Sep  6 19:46:54 2018 - [info] Got exit code 0 (Not master dead).
MySQL Replication Health is OK.

#开启MHA Manager监控(db53)

nohup masterha_manager --conf=/etc/masterha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /var/log/masterha/app1/manager.log 2>&1 &

#slave数据库重启后要执行以下操作(db51、db52)

mysql -uroot -p753951 -e "set global read_only=1"
mysql -uroot -p753951 -e "set global relay_log_purge=0"

#----------- 利用keeplived做VIP切换 -------------#

#(在db51、db52上安装keeplived)

wget http://www.keepalived.org/software/keepalived-2.0.1.tar.gz
yum -y install libnl libnl-devel libnfnetlink-devel
tar -zxvf keepalived-2.0.1.tar.gz
cd keepalived-2.0.1
./configure
make;make install
cp /usr/local/etc/sysconfig/keepalived  /etc/sysconfig/
mkdir /etc/keepalived
cp /usr/local/etc/keepalived/keepalived.conf /etc/keepalived/
cp /usr/local/sbin/keepalived /usr/sbin/
cp /opt/keepalived-2.0.1/keepalived/etc/init.d/keepalived /etc/rc.d/init.d/
chmod +x /etc/rc.d/init.d/keepalived

#主库(db51)

[root@db51 ~]# mv /etc/keepalived/keepalived.conf  /etc/keepalived/keepalived.conf-bak
[root@db51 ~]# vi /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {notification_email {1234567@163.com}notification_email_from admin@blufly.comsmtp_server 127.0.0.1smtp_connect_timeout 30router_id MySQL-HAenable_script_security
}
vrrp_script chk_mysql_port {    #检测mysql服务是否在运行。有很多方式,比如进程,用脚本检测等等script "/root/chk_mysql.sh"  #这里通过脚本监测interval 2                  #脚本执行间隔,每2s检测一次weight -5                   #脚本结果导致的优先级变更,检测失败(脚本返回非0)则优先级 -5fall 2                   #检测连续2次失败才算确定是真失败。会用weight减少优先级(1-255之间)rise 1                   #检测1次成功就算成功。但不修改优先级user root
}
vrrp_instance MysqlHA_1 {state BACKUPinterface eno16777984virtual_router_id 55priority 150advert_int 1nopreemptunicast_src_ip 192.168.5.51  ##本机ipunicast_peer {
192.168.5.52  ##对端ip}authentication {auth_type PASSauth_pass haha268}virtual_ipaddress {192.168.5.55}track_script {chk_mysql_port}
}

#备用主库(db52)

[root@db52 ~]# mv /etc/keepalived/keepalived.conf  /etc/keepalived/keepalived.conf-bak
[root@db52 ~]# vi /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {notification_email {1234567@163.com}notification_email_from admin@blufly.comsmtp_server 127.0.0.1smtp_connect_timeout 30router_id MySQL-HAenable_script_security
}
vrrp_script chk_mysql_port {    #检测mysql服务是否在运行。有很多方式,比如进程,用脚本检测等等script "/root/chk_mysql.sh"  #这里通过脚本监测interval 2                  #脚本执行间隔,每2s检测一次weight -5                   #脚本结果导致的优先级变更,检测失败(脚本返回非0)则优先级 -5fall 2                   #检测连续2次失败才算确定是真失败。会用weight减少优先级(1-255之间)rise 1                   #检测1次成功就算成功。但不修改优先级user root
}
vrrp_instance MysqlHA_1 {state BACKUPinterface eno16777984virtual_router_id 55priority 120advert_int 1nopreemptunicast_src_ip 192.168.5.52  ##本机ipunicast_peer {
192.168.5.51  ##对端ip}authentication {auth_type PASSauth_pass haha268}virtual_ipaddress {
192.168.5.55}track_script {chk_mysql_port}
}

#mysql端口检测脚本(db51、db52)

[root@db51 ~]# cat /root/chk_mysql.sh
#!/bin/bash
counter=$(netstat -na|grep "LISTEN"|grep "9106"|wc -l)
if [ "${counter}" -eq 0 ]; then/etc/init.d/keepalived stop
fi

#添加执行权限

[root@db51 ~]# chmod +x /root/chk_mysql.sh

##############

#上面两台服务器的keepalived都设置为了BACKUP模式,在keepalived中2种模式,分别是master->backup模式和backup->backup模式。

#这两种模式有很大区别。在master->backup模式下,一旦主库宕机,虚拟ip会自动漂移到从库,当主库修复后,keepalived启动后,

#还会把虚拟ip抢占过来,即使设置了非抢占模式(nopreempt)抢占ip的动作也会发生。在backup->backup模式下,当主库宕机后虚拟ip

#会自动漂移到从库上,当原主库恢复和keepalived服务启动后,并不会抢占新主的虚拟ip,即使是优先级高于从库的优先级别,

#也不会发生抢占。为了减少ip漂移次数,通常是把修复好的主库当做新的备库。

##############

#启动keepalived(db51、db52)

[root@db51 ~]# /etc/init.d/keepalived start
[root@db51 ~]# chkconfig keepalived on
[root@db51 ~]# systemctl list-unit-files|grep keepalived
keepalived.service                            enabled
[root@db52 ~]# /etc/init.d/keepalived start
[root@db52 ~]# chkconfig keepalived on
[root@db52 ~]# systemctl list-unit-files|grep keepalived
keepalived.service                            enabled

#查看VIP

[root@db51 ~]# ip addr|grep 192.168.5.55inet 192.168.5.55/32 scope global eno16777984

#修改master_ip_failover脚本(db53)

[root@db53 ~]# cat /usr/local/bin/master_ip_failover
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';
use Getopt::Long;
my ($command,          $ssh_user,        $orig_master_host, $orig_master_ip,$orig_master_port, $new_master_host, $new_master_ip,    $new_master_port
);
my $vip = '192.168.5.55';
my $ssh_start_vip = "/etc/init.d/keepalived start";
my $ssh_stop_vip = "/etc/init.d/keepalived stop";
GetOptions('command=s'          => \$command,'ssh_user=s'         => \$ssh_user,'ssh_port=s' => \$ssh_port,'orig_master_host=s' => \$orig_master_host,'orig_master_ip=s'   => \$orig_master_ip,'orig_master_port=i' => \$orig_master_port,'new_master_host=s'  => \$new_master_host,'new_master_ip=s'    => \$new_master_ip,'new_master_port=i'  => \$new_master_port,
);
exit &main();
sub main {print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";if ( $command eq "stop" || $command eq "stopssh" ) {my $exit_code = 1;eval {print "Disabling the VIP on old master: $orig_master_host \n";&stop_vip();$exit_code = 0;};if ($@) {warn "Got Error: $@\n";exit $exit_code;}exit $exit_code;}elsif ( $command eq "start" ) {my $exit_code = 10;eval {print "Enabling the VIP - $vip on the new master - $new_master_host \n";&start_vip();$exit_code = 0;};if ($@) {warn $@;exit $exit_code;}exit $exit_code;}elsif ( $command eq "status" ) {print "Checking the Status of the script.. OK \n";#`ssh $ssh_user\@cluster1 \" $ssh_start_vip \"`;exit 0;}else {&usage();exit 1;}
}
# A simple system call that enable the VIP on the new master
sub start_vip() {`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
# A simple system call that disable the VIP on the old_master
sub stop_vip() {return 0  unless  ($ssh_user);`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}
sub usage {print"Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}#修改master_ip_online_change脚本
[root@db53 ~]# cat /usr/local/bin/master_ip_online_change
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';
use Getopt::Long;
use MHA::DBHelper;
use MHA::NodeUtil;
use Time::HiRes qw( sleep gettimeofday tv_interval );
use Data::Dumper;
my $_tstart;
my $_running_interval = 0.1;
my ($command,          $orig_master_is_new_slave, $orig_master_host, $orig_master_ip,  $orig_master_port, $orig_master_user,    $orig_master_password, $orig_master_ssh_user,$new_master_host,          $new_master_ip,    $new_master_port, $new_master_user,  $new_master_password, $new_master_ssh_user,
);
my $vip = '192.168.5.55';
my $ssh_start_vip = "/etc/init.d/keepalived start";
my $ssh_stop_vip = "/etc/init.d/keepalived stop";
GetOptions('command=s'                => \$command,'ssh_user=s'     => \$ssh_user,'ssh_port=s'     => \$ssh_port,'orig_master_is_new_slave' => \$orig_master_is_new_slave,'orig_master_host=s'       => \$orig_master_host,'orig_master_ip=s'         => \$orig_master_ip,'orig_master_user=s'       => \$orig_master_user,'orig_master_password=s'   => \$orig_master_password,'new_master_host=s'        => \$new_master_host,'new_master_ip=s'          => \$new_master_ip,'new_master_port=i'        => \$new_master_port,'new_master_user=s'        => \$new_master_user,'new_master_password=s'    => \$new_master_password,
);
exit &main();
sub current_time_us {my ( $sec, $microsec ) = gettimeofday();my $curdate = localtime($sec);return $curdate . " " . sprintf( "%06d", $microsec );
}
sub sleep_until {my $elapsed = tv_interval($_tstart);if ( $_running_interval > $elapsed ) {sleep( $_running_interval - $elapsed );}
}
sub get_threads_util {my $dbh                    = shift;my $my_connection_id       = shift;my $running_time_threshold = shift;my $type                   = shift;$running_time_threshold = 0 unless ($running_time_threshold);$type                   = 0 unless ($type);my @threads;my $sth = $dbh->prepare("SHOW PROCESSLIST");$sth->execute();while ( my $ref = $sth->fetchrow_hashref() ) {my $id         = $ref->{Id};my $user       = $ref->{User};my $host       = $ref->{Host};my $command    = $ref->{Command};my $state      = $ref->{State};my $query_time = $ref->{Time};my $info       = $ref->{Info};$info =~ s/^\s*(.*?)\s*$/$1/ if defined($info);next if ( $my_connection_id == $id );next if ( defined($query_time) && $query_time < $running_time_threshold );next if ( defined($command)    && $command eq "Binlog Dump" );next if ( defined($user)       && $user eq "system user" );nextif ( defined($command)&& $command eq "Sleep"&& defined($query_time)&& $query_time >= 1 );if ( $type >= 1 ) {next if ( defined($command) && $command eq "Sleep" );next if ( defined($command) && $command eq "Connect" );}if ( $type >= 2 ) {next if ( defined($info) && $info =~ m/^select/i );next if ( defined($info) && $info =~ m/^show/i );}push @threads, $ref;}return @threads;
}
sub main {if ( $command eq "stop" ) {## Gracefully killing connections on the current master# 1. Set read_only= 1 on the new master# 2. DROP USER so that no app user can establish new connections# 3. Set read_only= 1 on the current master# 4. Kill current queries# * Any database access failure will result in script die.my $exit_code = 1;eval {## Setting read_only=1 on the new master (to avoid accident)my $new_master_handler = new MHA::DBHelper();# args: hostname, port, user, password, raise_error(die_on_error)_or_not$new_master_handler->connect( $new_master_ip, $new_master_port,$new_master_user, $new_master_password, 1 );print current_time_us() . " Set read_only on the new master.. ";$new_master_handler->enable_read_only();if ( $new_master_handler->is_read_only() ) {print "ok.\n";}else {die "Failed!\n";}$new_master_handler->disconnect();# Connecting to the orig master, die if any database error happensmy $orig_master_handler = new MHA::DBHelper();$orig_master_handler->connect( $orig_master_ip, $orig_master_port,$orig_master_user, $orig_master_password, 1 );## Drop application user so that nobody can connect. Disabling per-session binlog beforehand#$orig_master_handler->disable_log_bin_local();#print current_time_us() . " Drpping app user on the orig master..\n";#FIXME_xxx_drop_app_user($orig_master_handler);## Waiting for N * 100 milliseconds so that current connections can exitmy $time_until_read_only = 15;$_tstart = [gettimeofday];my @threads = get_threads_util( $orig_master_handler->{dbh},$orig_master_handler->{connection_id} );while ( $time_until_read_only > 0 && $#threads >= 0 ) {if ( $time_until_read_only % 5 == 0 ) {printf
"%s Waiting all running %d threads are disconnected.. (max %d milliseconds)\n",current_time_us(), $#threads + 1, $time_until_read_only * 100;if ( $#threads < 5 ) {print Data::Dumper->new( [$_] )->Indent(0)->Terse(1)->Dump . "\n"foreach (@threads);}}sleep_until();$_tstart = [gettimeofday];$time_until_read_only--;@threads = get_threads_util( $orig_master_handler->{dbh},$orig_master_handler->{connection_id} );}## Setting read_only=1 on the current master so that nobody(except SUPER) can writeprint current_time_us() . " Set read_only=1 on the orig master.. ";$orig_master_handler->enable_read_only();if ( $orig_master_handler->is_read_only() ) {print "ok.\n";}else {die "Failed!\n";}## Waiting for M * 100 milliseconds so that current update queries can completemy $time_until_kill_threads = 5;@threads = get_threads_util( $orig_master_handler->{dbh},$orig_master_handler->{connection_id} );while ( $time_until_kill_threads > 0 && $#threads >= 0 ) {if ( $time_until_kill_threads % 5 == 0 ) {printf
"%s Waiting all running %d queries are disconnected.. (max %d milliseconds)\n",current_time_us(), $#threads + 1, $time_until_kill_threads * 100;if ( $#threads < 5 ) {print Data::Dumper->new( [$_] )->Indent(0)->Terse(1)->Dump . "\n"foreach (@threads);}}sleep_until();$_tstart = [gettimeofday];$time_until_kill_threads--;@threads = get_threads_util( $orig_master_handler->{dbh},$orig_master_handler->{connection_id} );}print "Disabling the VIP on old master: $orig_master_host \n";&stop_vip();     ## Terminating all threadsprint current_time_us() . " Killing all application threads..\n";$orig_master_handler->kill_threads(@threads) if ( $#threads >= 0 );print current_time_us() . " done.\n";#$orig_master_handler->enable_log_bin_local();$orig_master_handler->disconnect();## After finishing the script, MHA executes FLUSH TABLES WITH READ LOCK$exit_code = 0;};if ($@) {warn "Got Error: $@\n";exit $exit_code;}exit $exit_code;}elsif ( $command eq "start" ) {## Activating master ip on the new master# 1. Create app user with write privileges# 2. Moving backup script if needed# 3. Register new master's ip to the catalog database
# We don't return error even though activating updatable accounts/ip failed so that we don't interrupt slaves' recovery.
# If exit code is 0 or 10, MHA does not abortmy $exit_code = 10;eval {my $new_master_handler = new MHA::DBHelper();# args: hostname, port, user, password, raise_error_or_not$new_master_handler->connect( $new_master_ip, $new_master_port,$new_master_user, $new_master_password, 1 );## Set read_only=0 on the new master#$new_master_handler->disable_log_bin_local();print current_time_us() . " Set read_only=0 on the new master.\n";$new_master_handler->disable_read_only();## Creating an app user on the new master#print current_time_us() . " Creating app user on the new master..\n";#FIXME_xxx_create_app_user($new_master_handler);#$new_master_handler->enable_log_bin_local();$new_master_handler->disconnect();## Update master ip on the catalog database, etcprint "Enabling the VIP - $vip on the new master - $new_master_host \n";&start_vip();$exit_code = 0;};if ($@) {warn "Got Error: $@\n";exit $exit_code;}exit $exit_code;}elsif ( $command eq "status" ) {# do nothingexit 0;}else {&usage();exit 1;}
}
# A simple system call that enable the VIP on the new master
sub start_vip() {`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
# A simple system call that disable the VIP on the old_master
sub stop_vip() {return 0  unless  ($ssh_user);`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}
sub usage {print
"Usage: master_ip_online_change --command=start|stop|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --orig_master_user=user --orig_master_password=password --orig_master_ssh_user=sshuser --new_master_host=host --new_master_ip=ip --new_master_port=port --new_master_user=user --new_master_password=password --new_master_ssh_user=sshuser \n";die;
}

#修改app1.cnf,取消master_ip_failover_script注释(db53)

[root@db53 ~]# sed -i 's/\r$//' /usr/local/bin/master_ip_online_change
[root@db53 ~]# sed -i 's/\r$//' /usr/local/bin/master_ip_failover#在app1.conf中去掉master_ip_failover、master_ip_failover注释
[root@db53 ~]# grep 'master_ip_failover_script' /etc/masterha/app1.cnf
master_ip_failover_script=/usr/local/bin/master_ip_failover

#检查mysql主从复制(db53)

[root@db53 ~]# masterha_check_repl --conf=/etc/masterha/app1.cnf
Fri Sep  7 09:53:22 2018 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Fri Sep  7 09:53:22 2018 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Fri Sep  7 09:53:22 2018 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Fri Sep  7 09:53:22 2018 - [info] MHA::MasterMonitor version 0.56.
Fri Sep  7 09:53:23 2018 - [info] GTID failover mode = 0
Fri Sep  7 09:53:23 2018 - [info] Dead Servers:
Fri Sep  7 09:53:23 2018 - [info] Alive Servers:
Fri Sep  7 09:53:23 2018 - [info]   192.168.5.51(192.168.5.51:9106)
Fri Sep  7 09:53:23 2018 - [info]   192.168.5.52(192.168.5.52:9106)
Fri Sep  7 09:53:23 2018 - [info]   192.168.5.53(192.168.5.53:9106)
Fri Sep  7 09:53:23 2018 - [info] Alive Slaves:
Fri Sep  7 09:53:23 2018 - [info]   192.168.5.52(192.168.5.52:9106)  Version=10.3.6-MariaDB-log (oldest major version between slaves) log-bin:enabled
Fri Sep  7 09:53:23 2018 - [info]     Replicating from 192.168.5.51(192.168.5.51:9106)
Fri Sep  7 09:53:23 2018 - [info]     Primary candidate for the new Master (candidate_master is set)
Fri Sep  7 09:53:23 2018 - [info]   192.168.5.53(192.168.5.53:9106)  Version=10.3.6-MariaDB-log (oldest major version between slaves) log-bin:enabled
Fri Sep  7 09:53:23 2018 - [info]     Replicating from 192.168.5.51(192.168.5.51:9106)
Fri Sep  7 09:53:23 2018 - [info] Current Alive Master: 192.168.5.51(192.168.5.51:9106)
Fri Sep  7 09:53:23 2018 - [info] Checking slave configurations..
Fri Sep  7 09:53:23 2018 - [info] Checking replication filtering settings..
Fri Sep  7 09:53:23 2018 - [info]  binlog_do_db= , binlog_ignore_db= information_schema,mysql,performance_schema,test
Fri Sep  7 09:53:23 2018 - [info]  Replication filtering check ok.
Fri Sep  7 09:53:23 2018 - [info] GTID (with auto-pos) is not supported
Fri Sep  7 09:53:23 2018 - [info] Starting SSH connection tests..
Fri Sep  7 09:53:27 2018 - [info] All SSH connection tests passed successfully.
Fri Sep  7 09:53:27 2018 - [info] Checking MHA Node version..
Fri Sep  7 09:53:28 2018 - [info]  Version check ok.
Fri Sep  7 09:53:28 2018 - [info] Checking SSH publickey authentication settings on the current master..
Fri Sep  7 09:53:28 2018 - [info] HealthCheck: SSH to 192.168.5.51 is reachable.
Fri Sep  7 09:53:29 2018 - [info] Master MHA Node version is 0.56.
Fri Sep  7 09:53:29 2018 - [info] Checking recovery script configurations on 192.168.5.51(192.168.5.51:9106)..
Fri Sep  7 09:53:29 2018 - [info]   Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql/binlog --output_file=/data/mysql/binlog/save_binary_logs_test --manager_version=0.56 --start_file=mysql-bin.000004
Fri Sep  7 09:53:29 2018 - [info]   Connecting to root@192.168.5.51(192.168.5.51:65535).. Creating /data/mysql/binlog if not exists..    ok.Checking output directory is accessible or not..ok.Binlog found at /data/mysql/binlog, up to mysql-bin.000004
Fri Sep  7 09:53:29 2018 - [info] Binlog setting check done.
Fri Sep  7 09:53:29 2018 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers..
Fri Sep  7 09:53:29 2018 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='blufly' --slave_host=192.168.5.52 --slave_ip=192.168.5.52 --slave_port=9106 --workdir=/data/mysql/binlog --target_version=10.3.6-MariaDB-log --manager_version=0.56 --relay_log_info=/data/mysql/relay-log.info  --relay_dir=/data/mysql/  --slave_pass=xxx
Fri Sep  7 09:53:29 2018 - [info]   Connecting to root@192.168.5.52(192.168.5.52:65535).. Checking slave recovery environment settings..Opening /data/mysql/relay-log.info ... ok.Relay log found at /data/mysql/binlog, up to mysql-relay-bin.000003Temporary relay log file is /data/mysql/binlog/mysql-relay-bin.000003Testing mysql connection and privileges.. done.Testing mysqlbinlog output.. done.Cleaning up test file(s).. done.
Fri Sep  7 09:53:30 2018 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user='blufly' --slave_host=192.168.5.53 --slave_ip=192.168.5.53 --slave_port=9106 --workdir=/data/mysql/binlog --target_version=10.3.6-MariaDB-log --manager_version=0.56 --relay_log_info=/data/mysql/relay-log.info  --relay_dir=/data/mysql/  --slave_pass=xxx
Fri Sep  7 09:53:30 2018 - [info]   Connecting to root@192.168.5.53(192.168.5.53:65535).. Checking slave recovery environment settings..Opening /data/mysql/relay-log.info ... ok.Relay log found at /data/mysql/binlog, up to mysql-relay-bin.000003Temporary relay log file is /data/mysql/binlog/mysql-relay-bin.000003Testing mysql connection and privileges.. done.Testing mysqlbinlog output.. done.Cleaning up test file(s).. done.
Fri Sep  7 09:53:30 2018 - [info] Slaves settings check done.
Fri Sep  7 09:53:30 2018 - [info]
192.168.5.51(192.168.5.51:9106) (current master)+--192.168.5.52(192.168.5.52:9106)+--192.168.5.53(192.168.5.53:9106)
Fri Sep  7 09:53:30 2018 - [info] Checking replication health on 192.168.5.52..
Fri Sep  7 09:53:30 2018 - [info]  ok.
Fri Sep  7 09:53:30 2018 - [info] Checking replication health on 192.168.5.53..
Fri Sep  7 09:53:30 2018 - [info]  ok.
Fri Sep  7 09:53:30 2018 - [info] Checking master_ip_failover_script status:
Fri Sep  7 09:53:30 2018 - [info]   /usr/local/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=192.168.5.51 --orig_master_ip=192.168.5.51 --orig_master_port=9106  --orig_master_ssh_port=65535
Unknown option: orig_master_ssh_port
IN SCRIPT TEST====/etc/init.d/keepalived stop==/etc/init.d/keepalived start===
Checking the Status of the script.. OK
Fri Sep  7 09:53:30 2018 - [info]  OK.
Fri Sep  7 09:53:30 2018 - [warning] shutdown_script is not defined.
Fri Sep  7 09:53:30 2018 - [info] Got exit code 0 (Not master dead).
MySQL Replication Health is OK.

#MHA manager启动脚本

[root@db53 ~]# vi /etc/init.d/mha_manager
#! /bin/sh
# Description: Startup mha_manager
# chkconfig: 2345 55 25
function start(){
nohup /usr/local/bin/masterha_manager --conf=/etc/masterha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /var/log/masterha/app1/manager.log 2>&1 &
}
function stop(){
/usr/local/bin/masterha_stop --conf=/etc/masterha/app1.cnf
}
function status(){
/usr/local/bin/masterha_check_status --conf=/etc/masterha/app1.cnf
}
case "$1" in
start)
start
;;
stop)
stop
;;
status)
status
;;
*)
echo "Usage: $0 start|stop"
esac

#添加执行权限

[root@db53 ~]# chmod +x /etc/init.d/mha_manager

#启动MHA manager(db53)

[root@db53 ~]# sed -i 's/\r$//' /etc/init.d/mha_manager
[root@db53 ~]# /etc/init.d/mha_manager start
[root@db53 ~]# chkconfig mha_manager on

#通过MHA Manger监控,查看集群里面现在谁是master

[root@db53 ~]# masterha_check_status --conf=/etc/masterha/app1.cnf
app1 (pid:18766) is running(0:PING_OK), master:192.168.5.51

#--------- 自动Failover(MHA Manager必须运行)-------#

#在主库上使用sysbench生成测试数据

[root@db51 ~]# yum install sysbench -y

#在主库(db51)上进行sysbench数据生成,在mhatest库下生成10张表,每张表10w条记录

[root@db51 ~]# mysql -uroot -p753951 -e "create database mhatest"
[root@db51 ~]# sysbench /usr/share/sysbench/oltp_write_only.lua --mysql-host=127.0.0.1 --mysql-port=9106 --mysql-user=root --mysql-password=753951 --mysql-socket=/tmp/mysql.sock --mysql-db=mhatest --db-driver=mysql --tables=10 --table_size=100000 --report-interval=10 --threads=30 --time=120 prepare

#停掉slave sql线程,模拟主从延时(db52)

[root@db52 ~]# mysql -uroot -p
MariaDB [(none)]> stop slave io_thread;
Query OK, 0 rows affected (0.08 sec)
MariaDB [(none)]> show slave status\G
*************************** 1. row ***************************Slave_IO_State: Master_Host: 192.168.5.51Master_User: replMaster_Port: 9106Connect_Retry: 60Master_Log_File: mysql-bin.000005Read_Master_Log_Pos: 89202349Relay_Log_File: mysql-relay-bin.000006Relay_Log_Pos: 89202648Relay_Master_Log_File: mysql-bin.000005Slave_IO_Running: NoSlave_SQL_Running: YesReplicate_Do_DB: Replicate_Ignore_DB: test,mysql,information_schema,performance_schemaReplicate_Do_Table: Replicate_Ignore_Table: Replicate_Wild_Do_Table: Replicate_Wild_Ignore_Table: Last_Errno: 0Last_Error: Skip_Counter: 0Exec_Master_Log_Pos: 89202349Relay_Log_Space: 194088288Until_Condition: NoneUntil_Log_File: Until_Log_Pos: 0Master_SSL_Allowed: NoMaster_SSL_CA_File: Master_SSL_CA_Path: Master_SSL_Cert: Master_SSL_Cipher: Master_SSL_Key: Seconds_Behind_Master: NULL
Master_SSL_Verify_Server_Cert: NoLast_IO_Errno: 0Last_IO_Error: Last_SQL_Errno: 0Last_SQL_Error: Replicate_Ignore_Server_Ids: Master_Server_Id: 51Master_SSL_Crl: Master_SSL_Crlpath: Using_Gtid: Slave_PosGtid_IO_Pos: 0-51-922Replicate_Do_Domain_Ids: Replicate_Ignore_Domain_Ids: Parallel_Mode: conservativeSQL_Delay: 0SQL_Remaining_Delay: NULLSlave_SQL_Running_State: Slave has read all relay log; waiting for the slave I/O thread to update it
1 row in set (0.000 sec)

#在主库上(db51)进行压力测试,持续时间为3分钟,产生大量的binlog

[root@db51 ~]# sysbench /usr/share/sysbench/oltp_write_only.lua --mysql-host=127.0.0.1 --mysql-port=9106 --mysql-user=root --mysql-password=753951 --mysql-socket=/tmp/mysql.sock --mysql-db=mhatest --db-driver=mysql --tables=10 --table_size=100000 --report-interval=10 --threads=30 --time=180 run

#开启slave(db52)上的IO线程,追赶落后于master的binlog

[root@db52 ~]# mysql -uroot -p
MariaDB [(none)]> start slave io_thread;
Query OK, 0 rows affected (0.00 sec)
MariaDB [(none)]> show slave status\G
*************************** 1. row ***************************Slave_IO_State: Waiting for master to send eventMaster_Host: 192.168.5.51Master_User: replMaster_Port: 9106Connect_Retry: 60Master_Log_File: mysql-bin.000009Read_Master_Log_Pos: 99099077Relay_Log_File: mysql-relay-bin.000009Relay_Log_Pos: 34972386Relay_Master_Log_File: mysql-bin.000006Slave_IO_Running: YesSlave_SQL_Running: YesReplicate_Do_DB: Replicate_Ignore_DB: test,mysql,information_schema,performance_schemaReplicate_Do_Table: Replicate_Ignore_Table: Replicate_Wild_Do_Table: Replicate_Wild_Ignore_Table: Last_Errno: 0Last_Error: Skip_Counter: 0Exec_Master_Log_Pos: 34972087Relay_Log_Space: 623421793Until_Condition: NoneUntil_Log_File: Until_Log_Pos: 0Master_SSL_Allowed: NoMaster_SSL_CA_File: Master_SSL_CA_Path: Master_SSL_Cert: Master_SSL_Cipher: Master_SSL_Key: Seconds_Behind_Master: 245
Master_SSL_Verify_Server_Cert: NoLast_IO_Errno: 0Last_IO_Error: Last_SQL_Errno: 0Last_SQL_Error: Replicate_Ignore_Server_Ids: Master_Server_Id: 51Master_SSL_Crl: Master_SSL_Crlpath: Using_Gtid: Slave_PosGtid_IO_Pos: 0-51-520164Replicate_Do_Domain_Ids: Replicate_Ignore_Domain_Ids: Parallel_Mode: conservativeSQL_Delay: 0SQL_Remaining_Delay: NULLSlave_SQL_Running_State: Waiting for room in worker thread event queue
1 row in set (0.000 sec)MariaDB [mhatest]> show tables;
+-------------------+
| Tables_in_mhatest |
+-------------------+
| sbtest1           |
| sbtest10          |
| sbtest2           |
| sbtest3           |
| sbtest4           |
| sbtest5           |
| sbtest6           |
| sbtest7           |
| sbtest8           |
| sbtest9           |
+-------------------+
10 rows in set (0.000 sec)MariaDB [mhatest]> select count(*) from sbtest1;
+----------+
| count(*) |
+----------+
|   100000 |
+----------+
1 row in set (0.027 sec)

#可以看到备库的数据已同步过来了

#杀掉主库(db51)mysql进程,模拟主库发生故障,进行自动failover操作

[root@db51 ~]# pkill -9 mysqld

#查看MHA切换日志,了解整个切换过程(db53)

cat /var/log/masterha/app1/manager.log
----- Failover Report -----
app1: MySQL Master failover 192.168.5.51(192.168.5.51:9106) to 192.168.5.52(192.168.5.52:9106) succeeded
Master 192.168.5.51(192.168.5.51:9106) is down!
Check MHA Manager logs at db53.blufly.com:/var/log/masterha/app1/manager.log for details.
Started automated(non-interactive) failover.
Invalidated master IP address on 192.168.5.51(192.168.5.51:9106)
The latest slave 192.168.5.52(192.168.5.52:9106) has all relay logs for recovery.
Selected 192.168.5.52(192.168.5.52:9106) as a new master.
192.168.5.52(192.168.5.52:9106): OK: Applying all logs succeeded.
192.168.5.52(192.168.5.52:9106): OK: Activated master IP address.
192.168.5.53(192.168.5.53:9106): This host has the latest relay log events.
Generating relay diff files from the latest slave succeeded.
192.168.5.53(192.168.5.53:9106): OK: Applying all logs succeeded. Slave started, replicating from 192.168.5.52(192.168.5.52:9106)
192.168.5.52(192.168.5.52:9106): Resetting slave info succeeded.
Master failover to 192.168.5.52(192.168.5.52:9106) completed successfully.

#此时192.168.5.52已经是新的master了

#一旦发生切换管理进程(Manager)将会退出,无法进行再次测试,需将故障数据库解决掉之后,重新change加入到MHA环境中来,并且要保证app1.failover.complete不存在或则加上--ignore_last_failover参数忽略,才能再次开启管理进程。

#--------- 修复宕机的Master作为Slave ---------------------#

#通常情况下自动切换以后,原master可能已经废弃掉,待原master主机修复后,如果数据完整的情况下,可能想把原来master重新作为新主库的slave,这时我们可以借助当时自动切换时刻的MHA日志来完成对原master的修复。下面是提取相关日志的命令:

[root@db53 app1]# grep -i "All other slaves should start" manager.log
Mon Apr 21 22:28:33 2014 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.5.52', MASTER_PORT=9106, MASTER_LOG_FILE='mysql-bin.000008', MASTER_LOG_POS=342, MASTER_USER='repl', MASTER_PASSWORD='qazqwe1688';

#获取上述信息以后,就可以直接在修复后的master上执行change master to相关操作,重新作为从库了。

[root@db51 ~]# /etc/init.d/mysqld start
[root@db51 ~]# /etc/init.d/keepalived start
[root@db51 ~]# mysql -uroot -p753951
MariaDB [(none)]> CHANGE MASTER TO MASTER_HOST='192.168.5.52', MASTER_PORT=9106, MASTER_LOG_FILE='mysql-bin.000008', MASTER_LOG_POS=342, MASTER_USER='repl', MASTER_PASSWORD='qazqwe1688';
MariaDB [(none)]> start slave;
MariaDB [(none)]> show slave status\G;

#把原主库设置为只读

[root@db51 ~]# mysql -uroot -p753951 -e "set global read_only=1"
[root@db51 ~]# mysql -uroot -p753951 -e "set global relay_log_purge=0"

#通过MHA Manger监控,查看集群里面现在谁是master

[root@db53 ~]# /etc/init.d/mha_manager start
[root@db53 ~]# masterha_check_status --conf=/etc/masterha/app1.cnf
app1 (pid:21725) is running(0:PING_OK), master:192.168.5.52

#--------- 手动Failover(MHA Manager必须没有运行)-------#

#关闭MHA Manager(db53)

[root@db53 ~]# /etc/init.d/masterha_manager stop
[root@db53 ~]# masterha_master_switch --master_state=dead --conf=/etc/masterha/app1.cnf --dead_master_host=192.168.5.52 --dead_master_port=9106 --new_master_host=192.168.5.51 --new_master_port=9106 --ignore_last_failover

#-------------------- 在线进行切换 ----------------------#

#通过MHA Manger监控,查看集群里面现在谁是master

[root@db53 ~]# masterha_check_status --conf=/etc/masterha/app1.cnf
app1 (pid:26244) is running(0:PING_OK), master:192.168.5.52

#首先,停掉MHA监控:

[root@db53 ~]# /etc/init.d/mha_manager stop

#查看manager status

[root@db53 ~]# masterha_check_status --conf=/etc/masterha/app1.cnf
app1 is stopped(2:NOT_RUNNING).

#其次,进行在线切换操作(模拟在线切换主库操作,原主库192.168.5.52变为slave,192.168.5.51提升为新的主库)

[root@db53 ~]# masterha_master_switch --conf=/etc/masterha/app1.cnf --master_state=alive --new_master_host=192.168.5.51 --new_master_port=9106 --orig_master_is_new_slave --running_updates_limit=10000
Fri Sep 21 12:44:52 2018 - [info] MHA::MasterRotate version 0.56.
Fri Sep 21 12:44:52 2018 - [info] Starting online master switch..
Fri Sep 21 12:44:52 2018 - [info]
Fri Sep 21 12:44:52 2018 - [info] * Phase 1: Configuration Check Phase..
Fri Sep 21 12:44:52 2018 - [info]
Fri Sep 21 12:44:52 2018 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Fri Sep 21 12:44:52 2018 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Fri Sep 21 12:44:52 2018 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Fri Sep 21 12:44:53 2018 - [info] GTID failover mode = 0
Fri Sep 21 12:44:53 2018 - [info] Current Alive Master: 192.168.5.52(192.168.5.52:9106)
Fri Sep 21 12:44:53 2018 - [info] Alive Slaves:
Fri Sep 21 12:44:53 2018 - [info]   192.168.5.51(192.168.5.51:9106)  Version=10.3.6-MariaDB-log (oldest major version between slaves) log-bin:enabled
Fri Sep 21 12:44:53 2018 - [info]     Replicating from 192.168.5.52(192.168.5.52:9106)
Fri Sep 21 12:44:53 2018 - [info]   192.168.5.53(192.168.5.53:9106)  Version=10.3.6-MariaDB-log (oldest major version between slaves) log-bin:enabled
Fri Sep 21 12:44:53 2018 - [info]     Replicating from 192.168.5.52(192.168.5.52:9106)
It is better to execute FLUSH NO_WRITE_TO_BINLOG TABLES on the master before switching. Is it ok to execute on 192.168.5.52(192.168.5.52:9106)? (YES/no): yes
Fri Sep 21 12:44:55 2018 - [info] Executing FLUSH NO_WRITE_TO_BINLOG TABLES. This may take long time..
Fri Sep 21 12:44:55 2018 - [info]  ok.
Fri Sep 21 12:44:55 2018 - [info] Checking MHA is not monitoring or doing failover..
Fri Sep 21 12:44:55 2018 - [info] Checking replication health on 192.168.5.51..
Fri Sep 21 12:44:55 2018 - [info]  ok.
Fri Sep 21 12:44:55 2018 - [info] Checking replication health on 192.168.5.53..
Fri Sep 21 12:44:55 2018 - [info]  ok.
Fri Sep 21 12:44:55 2018 - [info] 192.168.5.51 can be new master.
Fri Sep 21 12:44:55 2018 - [info]
From:
192.168.5.52(192.168.5.52:9106) (current master)+--192.168.5.51(192.168.5.51:9106)+--192.168.5.53(192.168.5.53:9106)
To:
192.168.5.51(192.168.5.51:9106) (new master)+--192.168.5.53(192.168.5.53:9106)+--192.168.5.52(192.168.5.52:9106)
Starting master switch from 192.168.5.52(192.168.5.52:9106) to 192.168.5.51(192.168.5.51:9106)? (yes/NO): yes
Fri Sep 21 12:44:59 2018 - [info] Checking whether 192.168.5.51(192.168.5.51:9106) is ok for the new master..
Fri Sep 21 12:44:59 2018 - [info]  ok.
Fri Sep 21 12:44:59 2018 - [info] 192.168.5.52(192.168.5.52:9106): SHOW SLAVE STATUS returned empty result. To check replication filtering rules, temporarily executing CHANGE MASTER to a dummy host.
Fri Sep 21 12:44:59 2018 - [info] 192.168.5.52(192.168.5.52:9106): Resetting slave pointing to the dummy host.
Fri Sep 21 12:44:59 2018 - [info] ** Phase 1: Configuration Check Phase completed.
Fri Sep 21 12:44:59 2018 - [info]
Fri Sep 21 12:44:59 2018 - [info] * Phase 2: Rejecting updates Phase..
Fri Sep 21 12:44:59 2018 - [info]
Fri Sep 21 12:44:59 2018 - [info] Executing master ip online change script to disable write on the current master:
Fri Sep 21 12:44:59 2018 - [info]   /usr/local/bin/master_ip_online_change --command=stop --orig_master_host=192.168.5.52 --orig_master_ip=192.168.5.52 --orig_master_port=9106 --orig_master_user='blufly' --orig_master_password='852741' --new_master_host=192.168.5.51 --new_master_ip=192.168.5.51 --new_master_port=9106 --new_master_user='blufly' --new_master_password='852741' --orig_master_ssh_user=root --new_master_ssh_user=root  --orig_master_ssh_port=65535  --new_master_ssh_port=65535 --orig_master_is_new_slave
Unknown option: orig_master_ssh_port
Unknown option: new_master_ssh_port
Fri Sep 21 12:44:59 2018 849902 Set read_only on the new master.. ok.
Fri Sep 21 12:44:59 2018 854742 Set read_only=1 on the orig master.. ok.
Disabling the VIP on old master: 192.168.5.52
ssh: connect to host 192.168.5.52 port 22: Connection refused
Fri Sep 21 12:44:59 2018 865695 Killing all application threads..
Fri Sep 21 12:44:59 2018 865768 done.
Fri Sep 21 12:44:59 2018 - [info]  ok.
Fri Sep 21 12:44:59 2018 - [info] Locking all tables on the orig master to reject updates from everybody (including root):
Fri Sep 21 12:44:59 2018 - [info] Executing FLUSH TABLES WITH READ LOCK..
Fri Sep 21 12:44:59 2018 - [info]  ok.
Fri Sep 21 12:44:59 2018 - [info] Orig master binlog:pos is mysql-bin.000008:629044.
Fri Sep 21 12:44:59 2018 - [info]  Waiting to execute all relay logs on 192.168.5.51(192.168.5.51:9106)..
Fri Sep 21 12:44:59 2018 - [info]  master_pos_wait(mysql-bin.000008:629044) completed on 192.168.5.51(192.168.5.51:9106). Executed 0 events.
Fri Sep 21 12:44:59 2018 - [info]   done.
Fri Sep 21 12:44:59 2018 - [info] Getting new master's binlog name and position..
Fri Sep 21 12:44:59 2018 - [info]  mysql-bin.000015:545235
Fri Sep 21 12:44:59 2018 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.5.51', MASTER_PORT=9106, MASTER_LOG_FILE='mysql-bin.000015', MASTER_LOG_POS=545235, MASTER_USER='repl', MASTER_PASSWORD='xxx';
Fri Sep 21 12:44:59 2018 - [info] Executing master ip online change script to allow write on the new master:
Fri Sep 21 12:44:59 2018 - [info]   /usr/local/bin/master_ip_online_change --command=start --orig_master_host=192.168.5.52 --orig_master_ip=192.168.5.52 --orig_master_port=9106 --orig_master_user='blufly' --orig_master_password='852741' --new_master_host=192.168.5.51 --new_master_ip=192.168.5.51 --new_master_port=9106 --new_master_user='blufly' --new_master_password='852741' --orig_master_ssh_user=root --new_master_ssh_user=root  --orig_master_ssh_port=65535  --new_master_ssh_port=65535 --orig_master_is_new_slave
Unknown option: orig_master_ssh_port
Unknown option: new_master_ssh_port
Fri Sep 21 12:45:00 2018 107287 Set read_only=0 on the new master.
Enabling the VIP - 192.168.5.55 on the new master - 192.168.5.51
ssh: connect to host 192.168.5.51 port 22: Connection refused
Fri Sep 21 12:45:00 2018 - [info]  ok.
Fri Sep 21 12:45:00 2018 - [info]
Fri Sep 21 12:45:00 2018 - [info] * Switching slaves in parallel..
Fri Sep 21 12:45:00 2018 - [info]
Fri Sep 21 12:45:00 2018 - [info] -- Slave switch on host 192.168.5.53(192.168.5.53:9106) started, pid: 19526
Fri Sep 21 12:45:00 2018 - [info]
Fri Sep 21 12:45:01 2018 - [info] Log messages from 192.168.5.53 ...
Fri Sep 21 12:45:01 2018 - [info]
Fri Sep 21 12:45:00 2018 - [info]  Waiting to execute all relay logs on 192.168.5.53(192.168.5.53:9106)..
Fri Sep 21 12:45:00 2018 - [info]  master_pos_wait(mysql-bin.000008:629044) completed on 192.168.5.53(192.168.5.53:9106). Executed 0 events.
Fri Sep 21 12:45:00 2018 - [info]   done.
Fri Sep 21 12:45:00 2018 - [info]  Resetting slave 192.168.5.53(192.168.5.53:9106) and starting replication from the new master 192.168.5.51(192.168.5.51:9106)..
Fri Sep 21 12:45:00 2018 - [info]  Executed CHANGE MASTER.
Fri Sep 21 12:45:00 2018 - [info]  Slave started.
Fri Sep 21 12:45:01 2018 - [info] End of log messages from 192.168.5.53 ...
Fri Sep 21 12:45:01 2018 - [info]
Fri Sep 21 12:45:01 2018 - [info] -- Slave switch on host 192.168.5.53(192.168.5.53:9106) succeeded.
Fri Sep 21 12:45:01 2018 - [info] Unlocking all tables on the orig master:
Fri Sep 21 12:45:01 2018 - [info] Executing UNLOCK TABLES..
Fri Sep 21 12:45:01 2018 - [info]  ok.
Fri Sep 21 12:45:01 2018 - [info] Starting orig master as a new slave..
Fri Sep 21 12:45:01 2018 - [info]  Resetting slave 192.168.5.52(192.168.5.52:9106) and starting replication from the new master 192.168.5.51(192.168.5.51:9106)..
Fri Sep 21 12:45:01 2018 - [info]  Executed CHANGE MASTER.
Fri Sep 21 12:45:01 2018 - [info]  Slave started.
Fri Sep 21 12:45:01 2018 - [info] All new slave servers switched successfully.
Fri Sep 21 12:45:01 2018 - [info]
Fri Sep 21 12:45:01 2018 - [info] * Phase 5: New master cleanup phase..
Fri Sep 21 12:45:01 2018 - [info]
Fri Sep 21 12:45:01 2018 - [info]  192.168.5.51: Resetting slave info succeeded.
Fri Sep 21 12:45:01 2018 - [info] Switching master to 192.168.5.51(192.168.5.51:9106) completed successfully.

#-orig_master_is_new_slave 切换时加上此参数是将原 master 变为 slave 节点,如果不加此参数,原来的 master 将不启动

#--running_updates_limit=10000,故障切换时,候选master 如果有延迟的话,mha切换不能成功,加上此参数表示延迟在此时间范围内都可切换(单位为s),但是切换的时间长短是由recover 时relay 日志的大小决定

#注意:由于在线进行切换需要调用到master_ip_online_change这个脚本

#通过MHA Manger监控,查看集群里面现在谁是master

[root@db53 ~]# /etc/init.d/mha_manager start
[root@db53 ~]# masterha_check_status --conf=/etc/masterha/app1.cnf
app1 (pid:19644) is running(0:PING_OK), master:192.168.5.51

#master已经切换到192.168.5.51

#查看VIP切换情况

[root@db51 ~]# ip addr|grep 192.168.5.55inet 192.168.5.55/32 scope global eno16777984

#MaxScale上进行验证

[root@maxscale61 ~]# maxadmin -S /tmp/maxadmin.sock
MaxScale> list servers
Servers.
-------------------+-----------------+-------+-------------+--------------------
Server             | Address         | Port  | Connections | Status
-------------------+-----------------+-------+-------------+--------------------
server1            | 192.168.5.51    |  9106 |           0 | Master, Running
server2            | 192.168.5.52    |  9106 |           0 | Slave, Running
server3            | 192.168.5.53    |  9106 |           0 | Slave, Running
-------------------+-----------------+-------+-------------+--------------------

至此mysql一主两从的主从同步、MHA高可用已实现,下一步就是实现Maxscale读写分离和Maxscale HA,相关脚本已放到https://github.com/ivehu/mha

mysql高可用方案之主从同步+MHA高可用相关推荐

  1. MQ问题集(kafka主从同步与高可用,MQ重复消费、幂等)

    1.kafka主从同步与高可用 https://1028826685.iteye.com/blog/2354570 http://developer.51cto.com/art/201808/5815 ...

  2. Mysql服务器线上配置主从同步

    我们一般在线上搭建MYSQL都会部署一套主从同步方案: 当master(主)库的数据发生变化的时候,变化会实时的同步到slave(从)库. 主从复制的过程: Mysql同步过程的第一部分就是maste ...

  3. Mysql的高可用方案及主从详细配置

    1.常用的高可用MySQL解决方案: 数据库作为最基础的数据存储服务之一,在存储系统中有着非常重要的地位,因此要求其具备高可用性无可厚非.能实现不同SLA(服务水平协定)的解决方案有很多种,这些方案可 ...

  4. mysql高可用方案之主从架构(master-slave)

    mysql主从在目前企业中很常见的方案之一,主是将DML,DDL,DML语句用mysql dump进程将二进制日志记录,从是用thread_io线程读取主上的制日志存放中继日志中,再用thread_s ...

  5. redis的主从同步及高可用集群

    redis 是一个高性能的 key-value 数据库. redis 的出现,很大程度补偿了 memcached 这类 keyvalue 存储的不足,在部分场合可以对关系数据库起到很 好的补充作用.它 ...

  6. memcache高集群搭建----主从同步实验

    目录 一.环境部署 二.主从服务器搭建 2.1 修改主机名 2.2 上传软件包并解压 2.3 主服务器上安装magent代理 2.4 主从服务器搭建keepalived 2.5 主服务器中keepal ...

  7. Mysql数据备份恢复及主从同步

    目录 Mysql备份及主从同步 Mysql备份 1. mysqldump全量备份 2.log_bin增量备份 Mysql主从同步配置 1.1主从同步过程 1.2 配置 Mysql备份及主从同步 Mys ...

  8. ubuntu mysql混合开发_mysql5.7主从同步 ubuntu

    实现环境: | System | mysql   |  ip   | |主ubuntu | mysql-5.7.24   | 10.192.209.122 | |从ubuntu    | mysql- ...

  9. MySQL(17) 通过Docker搭建主从同步

    一.前言 本文将基于Docker部署2台MySQL服务实现主从同步,即主master用于写(增删改),从slave用来读(查) 二.Docker搭建MySQL实现主从同步 1.master(主)配置 ...

  10. mysql从库显示connecting_Mysql主从同步时Slave_IO_Running:Connecting ; Slave_SQL_Running:Yes的情况故障排除...

    前几天在测试主从服务器Mysql同步时遇到了从数据库显示Slave_IO_Running:Connecting: Slave_SQL_Running:Yes的问题. 下面列举几种可能的错误原因: 1. ...

最新文章

  1. 7. Query Expressions(查询表达式)
  2. linux 临时设置英文输入,Linux 设置 Rime 默认英文状态
  3. tensorflow 对csv数据进行批量获取
  4. Tkinter的Button组件的使用
  5. 开源Delphi:AutoCHM:CHM生成和还原Html工具
  6. 心脏为什么长在左边?原来是因为这个消失的器官
  7. 推荐一些数据挖掘和生信友好的SCI!
  8. HTML限制输入个数,如何使用CSS(或jQuery,如果需要)限制输入HTML输入的字符数?
  9. hdu1233 还是畅通工程 基础最小生成树
  10. [GO]冒泡排序的原理和代码实现
  11. docker安装cboard
  12. 批量提取html文字,批量提取网页内容(全自动)
  13. Latex \TexStudio 设置数学符号
  14. 独自一人开发返利平台小程序日记(准备开源中):万事开头难,既然做了,那就只能咬牙坚持了
  15. 深度装N卡LINUX驱动 性能怎么样,讲解Deepin 20开源Nouveau和闭源NVIDIA驱动,附装闭源N卡驱动的方法...
  16. win7——win server 2012 iis中使用asp程序出现Microsoft OLE DB Provider for ODBC Drivers 错误 '80004005'
  17. js 获取手机型号和版本
  18. c语言模拟走迷宫课程程序设计报告,《C语言程序设计》走迷宫游戏程序设计.doc...
  19. 应对羊毛党的老手段不管用了,但有些公司依然有办法,他们是怎么做的?
  20. Cong! ZOJ 100AC!

热门文章

  1. Android系统在新进程中启动自定义服务过程(startService)的原理分析
  2. 【原创·总结】影响sql查询性能的因素
  3. 汇编语言32位加减乘除运算题
  4. 【python】opencv、PIL、gdal读取tif高分遥感影像比较
  5. 从零基础入门Tensorflow2.0 ----二、4.3 wide deep 模型(多输入)
  6. numpy.cumsum()函数
  7. MATLAB非线性最小二乘lsqnonlin
  8. python 合并与连接
  9. day8--pandas
  10. Windows10内存泄漏,分页池高分析及解决方案