Show
added a comment - - edited Review
revno: 4260
revision-id: knielsen at knielsen-hq.org-20140620104939-xdvn985cgmwy3odd
parent: knielsen at knielsen-hq.org-20140619125043-acw2gx7rw90y3vqp
committer: Kristian Nielsen <knielsen at knielsen-hq.org>
branch nick: work-10.0
timestamp: Fri 2014-06-20 12:49:39 +0200
message:
MDEV-4937: sql_slave_skip_counter does not work with GTID
The sql_slave_skip_counter is important to be able to recover replication from
certain errors. Often, an appropriate solution is to set
sql_slave_skip_counter to skip over a problem event. But setting
sql_slave_skip_counter produced an error in GTID mode, with a suggestion to
instead set @@gtid_slave_pos to point past the problem event. This however is
not always possible; for example, in case of an INCIDENT event, that event
does not have any GTID to assign to @@gtid_slave_pos.
With this patch, sql_slave_skip_counter now works in GTID mode the same was as
in non-GTID mode. When set, that many initial events are skipped when the SQL
thread starts, plus as many extra events are needed to completely skip any
partially skipped event group. The GTID position is updated to point past the
skipped event(s).
<cut>
=== modified file 'sql/slave.cc'
--- a/sql/slave.cc 2014-06-19 12:50:43 +0000
+++ b/sql/slave.cc 2014-06-20 10:49:39 +0000
@@ -3521,9 +3521,6 @@ static int exec_relay_log_event(THD* thd
if (opt_gtid_ignore_duplicates)
{
- serial_rgi->current_gtid.domain_id= gev->domain_id;
- serial_rgi->current_gtid.server_id= gev->server_id;
- serial_rgi->current_gtid.seq_no= gev->seq_no;
int res= rpl_global_gtid_slave_state.check_duplicate_gtid
(&serial_rgi->current_gtid, serial_rgi);
if (res < 0)
What was the reason for removing the above assignments?
Was it a bug in the old code ?
(I have to ask as I am not 100% of what the old code did).
@@ -4366,6 +4363,7 @@ pthread_handler_t handle_slave_sql(void
char saved_master_log_name[FN_REFLEN];
my_off_t UNINIT_VAR(saved_log_pos);
my_off_t UNINIT_VAR(saved_master_log_pos);
+ String saved_skip_gtid_pos;
my_off_t saved_skip= 0;
Master_info *mi= ((Master_info*)arg);
Relay_log_info* rli = &mi->rli;
@@ -4571,6 +4569,12 @@ log '%s' at position %s, relay log '%s'
strmake_buf(saved_master_log_name, rli->group_master_log_name);
saved_log_pos= rli->group_relay_log_pos;
saved_master_log_pos= rli->group_master_log_pos;
+ if (mi->using_gtid != Master_info::USE_GTID_NO)
+ {
+ saved_skip_gtid_pos.append(STRING_WITH_LEN(", GTID '"));
+ rpl_append_gtid_state(&saved_skip_gtid_pos, false);
+ saved_skip_gtid_pos.append(STRING_WITH_LEN("'; "));
+ }
saved_skip= rli->slave_skip_counter;
}
if ((rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ||
@@ -4594,16 +4598,27 @@ log '%s' at position %s, relay log '%s'
if (saved_skip && rli->slave_skip_counter == 0)
{
+ String tmp;
+ if (mi->using_gtid != Master_info::USE_GTID_NO)
+ {
+ tmp.append(STRING_WITH_LEN(", GTID '"));
+ rpl_append_gtid_state(&tmp, false);
+ tmp.append(STRING_WITH_LEN("'; "));
+ }
If you want to avoid checking if tmp.append() fails, you could
create tmp with a static buffer.
char tmp_buff [MAX_GTID_LENGTH+10] ;
String tmp(tmp_buff, sizeof(tmp_buff), &my_charset_bin);
tmp.length(0);
Same goes for saved_skip_gtid_pos().
sql_print_information("'SQL_SLAVE_SKIP_COUNTER=%ld' executed at "
"relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
- "master_log_pos='%ld' and new position at "
+ "master_log_pos='%ld'%s and new position at "
"relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
- "master_log_pos='%ld' ",
+ "master_log_pos='%ld'%s ",
(ulong) saved_skip, saved_log_name, (ulong) saved_log_pos,
saved_master_log_name, (ulong) saved_master_log_pos,
+ saved_skip_gtid_pos.c_ptr_safe(),
rli->group_relay_log_name, (ulong) rli->group_relay_log_pos,
- rli->group_master_log_name, (ulong) rli->group_master_log_pos);
+ rli->group_master_log_name, (ulong) rli->group_master_log_pos,
+ tmp.c_ptr_safe());
saved_skip= 0;
+ saved_skip_gtid_pos.free();
If you free saved_skip_gtid_pos, it good to also free tmp!
}
if (exec_relay_log_event(thd, rli, serial_rgi))
<cut>
=== modified file 'sql/sys_vars.cc'
--- a/sql/sys_vars.cc 2014-06-09 18:00:23 +0000
+++ b/sql/sys_vars.cc 2014-06-20 10:49:39 +0000
@@ -4287,11 +4287,6 @@ bool update_multi_source_variable(sys_va
static bool update_slave_skip_counter(sys_var *self, THD *thd, Master_info *mi)
{
- if (mi->using_gtid != Master_info::USE_GTID_NO)
- {
- my_error(ER_SLAVE_SKIP_NOT_IN_GTID, MYF(0));
- return true;
- }
Shouldn't you also remove the error ER_SLAVE_SKIP_NOT_IN_GTID from the
error file?
Regards,
Monty
I would say that START SLAVE should produce a fatal error and not start the slave if sql_slave_skip_counter is non-zero. Otherwise, the DBA may be surprised at the outcome.