From 724b305dcb57ccbaa13b476d08a03c32ccd1b4ed Mon Sep 17 00:00:00 2001 From: jwijenbergh Date: Thu, 12 Jan 2023 18:41:37 +0100 Subject: Failover: Return early by waiting for a single pong --- internal/failover/failover.go | 7 ++----- internal/failover/monitor.go | 43 ++++++++++++++++++++++--------------------- internal/failover/ping.go | 26 ++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 26 deletions(-) (limited to 'internal') diff --git a/internal/failover/failover.go b/internal/failover/failover.go index 31fb2b0..1c4c32e 100644 --- a/internal/failover/failover.go +++ b/internal/failover/failover.go @@ -10,9 +10,6 @@ const ( // Send a ping every 2 seconds to the gateway pInterval time.Duration = 2 * time.Second - // pAlive is how many pings we need to have sent to check if the connection is alive - pAlive int = 3 - // pDropped is how many pings we need to have sent to check if the connection is dropped pDropped int = 5 ) @@ -20,6 +17,6 @@ const ( // New creates a failover monitor for the gateway and the rx bytes function reader // This is a simple wrapper over `NewDroppedMonitor` to create one with the default settings // If this function returns True, the connection is dropped. False means it has exited and we don't know for sure if it's dropped or not -func New(readRxBytes func() (int64, error), logger log.FileLogger) (*DroppedConMon, error) { - return NewDroppedMonitor(pInterval, pAlive, pDropped, readRxBytes, logger) +func New(readRxBytes func() (int64, error), logger log.FileLogger) *DroppedConMon { + return NewDroppedMonitor(pInterval, pDropped, readRxBytes, logger) } diff --git a/internal/failover/monitor.go b/internal/failover/monitor.go index a6916be..c437d30 100644 --- a/internal/failover/monitor.go +++ b/internal/failover/monitor.go @@ -12,8 +12,6 @@ import ( type DroppedConMon struct { // pInterval means how the interval in which to send pings pInterval time.Duration - // pAlive means how many pings need to be send before checking if the connection is alive - pAlive int // pDropped means how many pings need to be send before checking if the connection is dropped pDropped int // The function that reads Rx bytes @@ -27,11 +25,8 @@ type DroppedConMon struct { logger log.FileLogger } -func NewDroppedMonitor(pingInterval time.Duration, pAlive int, pDropped int, readRxBytes func() (int64, error), logger log.FileLogger) (*DroppedConMon, error) { - if pAlive >= pDropped { - return nil, errors.New("pAlive must be smaller than pDropped") - } - return &DroppedConMon{pInterval: pingInterval, pAlive: pAlive, pDropped: pDropped, readRxBytes: readRxBytes, logger: logger}, nil +func NewDroppedMonitor(pingInterval time.Duration, pDropped int, readRxBytes func() (int64, error), logger log.FileLogger) *DroppedConMon { + return &DroppedConMon{pInterval: pingInterval, pDropped: pDropped, readRxBytes: readRxBytes, logger: logger} } // Dropped checks whether or not the connection is 'dropped' @@ -70,32 +65,38 @@ func (m *DroppedConMon) Start(gateway string, mtuSize int) (bool, error) { return false, err } + // Send a ping and wait for max 2 seconds + // If we have then increased Rx bytes we return early + if err = p.Send(gateway, 1); err != nil { + m.logger.Debugf("[Failover] First ping failed, exiting...") + return false, err + } + m.logger.Debugf("[Failover] Now we are doing alive check") + + // Read the pong, if we got the echo reply then everything is fine, early return + if err = p.Read(time.Now().Add(m.pInterval)); err == nil { + m.logger.Debugf("[Failover] Got early pong, exiting...") + return false, err + } + m.logger.Debugf("[Failover] Error reading pong: %v", err) + // Create a new ticker that executes our ping function every 'interval' seconds // It starts immediately and stops when we reach the end ticker := time.NewTicker(m.pInterval) defer ticker.Stop() - m.logger.Debugf("[Failover] Starting...") + // Otherwise send n pings, without waiting for pong and then check if dropped + m.logger.Debugf("[Failover] Starting by sending pings and not waiting for pong...") // Loop until the max drop counter - // We begin with 1 as this is used as the sequence number for ping - for s := 1; s <= m.pDropped; s++ { + // We begin with 2 as this is used as the sequence number for ping + // and we have already sent a ping + for s := 2; s <= m.pDropped; s++ { m.logger.Debugf("[Failover] Sending ping: %d, with size: %d", s, mtuSize) // Send a ping and return if an error occurs if err := p.Send(gateway, s); err != nil { m.logger.Debugf("[Failover] A ping failed, exiting...") return false, err } - - // Early alive check - // If not dropped, return - if s == m.pAlive { - m.logger.Debugf("[Failover] Doing check if we are alive") - if d, err := m.dropped(b); !d { - m.logger.Debugf("[Failover] We are alive") - return false, err - } - m.logger.Debugf("[Failover] Not alive currently, ticking further...") - } // Wait for the next tick to continue select { case <-ticker.C: diff --git a/internal/failover/ping.go b/internal/failover/ping.go index 2ffedd4..15d4adb 100644 --- a/internal/failover/ping.go +++ b/internal/failover/ping.go @@ -4,6 +4,7 @@ import ( "fmt" "net" "os" + "time" "golang.org/x/net/icmp" "golang.org/x/net/ipv4" @@ -27,6 +28,30 @@ func NewPinger(size int) (*Pinger, error) { return &Pinger{listener: l, buffer: make([]byte, size-mtuOverhead)}, nil } +func (p Pinger) Read(deadline time.Time) error { + // First set the deadline to read + err := p.listener.SetReadDeadline(deadline) + if err != nil { + return err + } + + r := make([]byte, 1500) + n, _, err := p.listener.ReadFrom(r) + if err != nil { + return err + } + got, err := icmp.ParseMessage(ipv4.ICMPTypeEchoReply.Protocol(), r[:n]) + if err != nil { + return err + } + switch got.Type { + case ipv4.ICMPTypeEchoReply: + return nil + default: + return errors.Errorf("Not a ping echo reply, got %+v", got) + } +} + func (p Pinger) Send(gateway string, seq int) error { errorMessage := fmt.Sprintf("failed sending ping, seq %d", seq) // Make a new ICMP message @@ -47,5 +72,6 @@ func (p Pinger) Send(gateway string, seq int) error { if err != nil { return errors.WrapPrefix(err, errorMessage, 0) } + return nil } -- cgit v1.2.3