From: Stefano Brivio <sbrivio(a)redhat.com> Signed-off-by: Stefano Brivio <sbrivio(a)redhat.com> Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- flow.c | 36 ++++++++++++++++++++++++++++++++++++ flow.h | 2 ++ migrate.c | 9 +++++++++ tcp.c | 30 ++++++++++++++++++++++++++++++ tcp_conn.h | 1 + 5 files changed, 78 insertions(+) diff --git a/flow.c b/flow.c index dbe57082..4807653a 100644 --- a/flow.c +++ b/flow.c @@ -956,6 +956,42 @@ static int flow_migrate_repair_all(struct ctx *c, bool enable) return 0; } +/** + * flow_migrate_source_early() - Early tasks: shrink (RFC 7323 2.2) TCP windows + * @c: Execution context + * @stage: Migration stage information, unused + * @fd: Migration file descriptor, unused + * + * Return: 0 on success, positive error code on failure + */ +int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage, + int fd) +{ + union flow *flow; + unsigned i; + int rc; + + (void)stage; + (void)fd; + + /* We need repair mode to dump and set (some) window parameters */ + if ((rc = flow_migrate_repair_all(c, true))) + return -rc; + + foreach_tcp_flow(i, flow, FLOW_MAX) { + if ((rc = tcp_flow_migrate_shrink_window(i, &flow->tcp))) { + err("Shrinking window, flow %u: %s", i, strerror_(-rc)); + return flow_migrate_source_rollback(c, i, -rc); + } + } + + /* Now send window updates. We'll flip repair mode back on in a bit */ + if ((rc = flow_migrate_repair_all(c, false))) + return -rc; + + return 0; +} + /** * flow_migrate_source_pre() - Prepare flows for migration: enable repair mode * @c: Execution context diff --git a/flow.h b/flow.h index a485c359..675726eb 100644 --- a/flow.h +++ b/flow.h @@ -249,6 +249,8 @@ union flow; void flow_init(void); void flow_defer_handler(const struct ctx *c, const struct timespec *now); +int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage, + int fd); int flow_migrate_source_pre(struct ctx *c, const struct migrate_stage *stage, int fd); int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage, diff --git a/migrate.c b/migrate.c index d035deda..b4e53b45 100644 --- a/migrate.c +++ b/migrate.c @@ -98,6 +98,15 @@ static int seen_addrs_target_v1(struct ctx *c, /* Stages for version 1 */ static const struct migrate_stage stages_v1[] = { + /* FIXME: With this step, close() in tcp_flow_migrate_source_ext() + * *sometimes* closes the connection for real. + */ +/* { + .name = "shrink TCP windows", + .source = flow_migrate_source_early, + .target = NULL, + }, +*/ { .name = "observed addresses", .source = seen_addrs_source_v1, diff --git a/tcp.c b/tcp.c index c3db1696..94bb059a 100644 --- a/tcp.c +++ b/tcp.c @@ -3061,6 +3061,36 @@ static int tcp_flow_repair_wnd(int s, uint32_t *snd_wl1, uint32_t *snd_wnd, return 0; } +/** + * tcp_flow_migrate_shrink_window() - Dump window data, decrease socket window + * @fidx: Flow index + * @conn: Pointer to the TCP connection structure + * + * Return: 0 on success, negative error code on failure + */ +int tcp_flow_migrate_shrink_window(int fidx, const struct tcp_tap_conn *conn) +{ + struct tcp_tap_transfer_ext *t = &migrate_ext[fidx]; + struct tcp_repair_window wnd; + socklen_t sl = sizeof(wnd); + int s = conn->sock; + + if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &((int){ 0 }), sizeof(int))) + debug("TCP: failed to set SO_RCVBUF to minimum value"); + + /* Dump window data as it is for the target, before touching stuff */ + tcp_flow_repair_wnd(s, &t->sock_snd_wl1, &t->sock_snd_wnd, + &t->sock_max_window, &t->sock_rcv_wnd, + &t->sock_rcv_wup, &wnd); + + wnd.rcv_wnd = 0; + + if (setsockopt(s, IPPROTO_TCP, TCP_REPAIR_WINDOW, &wnd, sl)) + debug_perror("Setting window repair data, socket %i", s); + + return 0; +} + /** * tcp_flow_migrate_source() - Send data (flow table part) for a single flow * @c: Execution context diff --git a/tcp_conn.h b/tcp_conn.h index 1c920a6e..b64e857a 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -227,6 +227,7 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn); int tcp_flow_repair_on(struct ctx *c, const struct tcp_tap_conn *conn); int tcp_flow_repair_off(struct ctx *c, const struct tcp_tap_conn *conn); +int tcp_flow_migrate_shrink_window(int fidx, const struct tcp_tap_conn *conn); int tcp_flow_migrate_source(int fd, const struct tcp_tap_conn *conn); int tcp_flow_migrate_source_ext(int fd, int fidx, const struct tcp_tap_conn *conn); -- 2.48.1