[VOL-3482] Do not restart DHCP/EAPOL exchanges that are already in progress

Change-Id: Iebf3150e2046e4d124101e70fe237bbfc2daa3ff
diff --git a/internal/bbsim/devices/olt.go b/internal/bbsim/devices/olt.go
index c883e95..9610961 100644
--- a/internal/bbsim/devices/olt.go
+++ b/internal/bbsim/devices/olt.go
@@ -322,6 +322,7 @@
 	// terminate the OLT's processNniPacketIns go routine
 	go o.nniHandle.Close()
 	close(o.nniPktInChannel)
+	o.enableContextCancel()
 
 	time.Sleep(time.Duration(rebootDelay) * time.Second)
 
diff --git a/internal/bbsim/devices/service_test.go b/internal/bbsim/devices/service_test.go
index c585906..0c4ab2b 100644
--- a/internal/bbsim/devices/service_test.go
+++ b/internal/bbsim/devices/service_test.go
@@ -17,6 +17,7 @@
 package devices
 
 import (
+	"context"
 	"github.com/opencord/bbsim/internal/bbsim/types"
 	"github.com/opencord/voltha-protos/v3/go/openolt"
 	"github.com/stretchr/testify/assert"
@@ -47,13 +48,23 @@
 func (s *mockService) Initialize(stream types.Stream) {}
 func (s *mockService) Disable()                       {}
 
-// test the internalState transitions
-func TestService_InternalState(t *testing.T) {
+func createTestService(needsEapol bool, needsDchp bool) (*Service, error) {
+
+	enableContext := context.TODO()
+
 	mac := net.HardwareAddr{0x2e, 0x60, byte(1), byte(1), byte(1), byte(1)}
 	onu := createMockOnu(1, 1)
-	s, err := NewService("testService", mac, onu, 900, 900,
-		false, false, false, 64, 0, false,
+	onu.PonPort = &PonPort{}
+	onu.PonPort.Olt = &OltDevice{}
+	onu.PonPort.Olt.enableContext = enableContext
+	return NewService("testService", mac, onu, 900, 900,
+		needsEapol, needsDchp, false, 64, 0, false,
 		0, 0, 0, 0)
+}
+
+// test the internalState transitions
+func TestService_InternalState(t *testing.T) {
+	s, err := createTestService(false, false)
 
 	assert.Nil(t, err)
 
@@ -80,11 +91,7 @@
 
 // make sure that if the service does not need EAPOL we're not sending any packet
 func TestService_HandleAuth_noEapol(t *testing.T) {
-	mac := net.HardwareAddr{0x2e, 0x60, byte(1), byte(1), byte(1), byte(1)}
-	onu := createMockOnu(1, 1)
-	s, err := NewService("testService", mac, onu, 900, 900,
-		false, false, false, 64, 0, false,
-		0, 0, 0, 0)
+	s, err := createTestService(false, false)
 
 	assert.Nil(t, err)
 
@@ -106,11 +113,7 @@
 
 // make sure that if the service does need EAPOL we're sending any packet
 func TestService_HandleAuth_withEapol(t *testing.T) {
-	mac := net.HardwareAddr{0x2e, 0x60, byte(1), byte(1), byte(1), byte(1)}
-	onu := createMockOnu(1, 1)
-	s, err := NewService("testService", mac, onu, 900, 900,
-		true, false, false, 64, 0, false,
-		0, 0, 0, 0)
+	s, err := createTestService(true, false)
 
 	assert.Nil(t, err)
 
@@ -131,11 +134,7 @@
 
 // make sure that if the service does not need DHCP we're not sending any packet
 func TestService_HandleDhcp_not_needed(t *testing.T) {
-	mac := net.HardwareAddr{0x2e, 0x60, byte(1), byte(1), byte(1), byte(1)}
-	onu := createMockOnu(1, 1)
-	s, err := NewService("testService", mac, onu, 900, 900,
-		false, false, false, 64, 0, false,
-		0, 0, 0, 0)
+	s, err := createTestService(false, false)
 
 	assert.Nil(t, err)
 
@@ -156,11 +155,7 @@
 // when we receive a DHCP flow we call HandleDhcp an all the ONU Services
 // each service device whether the tag matches it's own configuration
 func TestService_HandleDhcp_different_c_Tag(t *testing.T) {
-	mac := net.HardwareAddr{0x2e, 0x60, byte(1), byte(1), byte(1), byte(1)}
-	onu := createMockOnu(1, 1)
-	s, err := NewService("testService", mac, onu, 900, 900,
-		false, false, false, 64, 0, false,
-		0, 0, 0, 0)
+	s, err := createTestService(false, true)
 
 	assert.Nil(t, err)
 
@@ -181,11 +176,7 @@
 
 // make sure that if the service does need DHCP we're sending any packet
 func TestService_HandleDhcp_needed(t *testing.T) {
-	mac := net.HardwareAddr{0x2e, 0x60, byte(1), byte(1), byte(1), byte(1)}
-	onu := createMockOnu(1, 1)
-	s, err := NewService("testService", mac, onu, 900, 900,
-		false, true, false, 64, 0, false,
-		0, 0, 0, 0)
+	s, err := createTestService(false, true)
 
 	assert.Nil(t, err)
 
@@ -200,3 +191,58 @@
 	assert.Equal(t, 1, stream.CallCount)
 	assert.Equal(t, "dhcp_discovery_sent", s.DHCPState.Current())
 }
+
+// Test that if the EAPOL state machine doesn't complete in 30 seconds we
+// move it to EAPOL failed
+func TestService_EAPOLFailed(t *testing.T) {
+	// override the default wait time
+	eapolWaitTime = 500 * time.Millisecond
+	s, err := createTestService(true, false)
+
+	assert.Nil(t, err)
+
+	stream := &mockStream{
+		Calls: make(map[int]*openolt.Indication),
+	}
+	s.Initialize(stream)
+
+	// set to failed if timeout occurs
+	_ = s.EapolState.Event("start_auth")
+	time.Sleep(1 * time.Second)
+	assert.Equal(t, "auth_failed", s.EapolState.Current())
+
+	// do not set to failed if succeeded
+	s.EapolState.SetState("created")
+	_ = s.EapolState.Event("start_auth")
+	s.EapolState.SetState("eap_response_success_received")
+	time.Sleep(1 * time.Second)
+	assert.Equal(t, "eap_response_success_received", s.EapolState.Current())
+
+}
+
+// Test that if the DHCP state machine doesn't complete in 30 seconds we
+// move it to DHCP failed
+func TestService_DHCPFailed(t *testing.T) {
+	// override the default wait time
+	dhcpWaitTime = 100 * time.Millisecond
+	s, err := createTestService(false, true)
+
+	assert.Nil(t, err)
+
+	stream := &mockStream{
+		Calls: make(map[int]*openolt.Indication),
+	}
+	s.Initialize(stream)
+
+	// set to failed if timeout occurs
+	_ = s.DHCPState.Event("start_dhcp")
+	time.Sleep(1 * time.Second)
+	assert.Equal(t, "dhcp_failed", s.DHCPState.Current())
+
+	// do not set to failed if succeeded
+	s.DHCPState.SetState("created")
+	_ = s.DHCPState.Event("start_dhcp")
+	s.DHCPState.SetState("dhcp_ack_received")
+	time.Sleep(1 * time.Second)
+	assert.Equal(t, "dhcp_ack_received", s.DHCPState.Current())
+}
diff --git a/internal/bbsim/devices/services.go b/internal/bbsim/devices/services.go
index 1d45ef7..8aa3410 100644
--- a/internal/bbsim/devices/services.go
+++ b/internal/bbsim/devices/services.go
@@ -26,12 +26,18 @@
 	bbsimTypes "github.com/opencord/bbsim/internal/bbsim/types"
 	log "github.com/sirupsen/logrus"
 	"net"
+	"time"
 )
 
 var serviceLogger = log.WithFields(log.Fields{
 	"module": "SERVICE",
 })
 
+// time to wait before fail EAPOL/DHCP
+// (it's a variable and not a constant so it can be overridden in the tests)
+var eapolWaitTime = 30 * time.Second
+var dhcpWaitTime = 30 * time.Second
+
 type ServiceIf interface {
 	HandlePackets()      // start listening on the PacketCh
 	HandleAuth()         // Sends the EapoStart packet
@@ -134,7 +140,7 @@
 	service.EapolState = fsm.NewFSM(
 		"created",
 		fsm.Events{
-			{Name: "start_auth", Src: []string{"created", "eap_start_sent", "eap_response_identity_sent", "eap_response_challenge_sent", "eap_response_success_received", "auth_failed"}, Dst: "auth_started"},
+			{Name: "start_auth", Src: []string{"created", "eap_response_success_received", "auth_failed"}, Dst: "auth_started"},
 			{Name: "eap_start_sent", Src: []string{"auth_started"}, Dst: "eap_start_sent"},
 			{Name: "eap_response_identity_sent", Src: []string{"eap_start_sent"}, Dst: "eap_response_identity_sent"},
 			{Name: "eap_response_challenge_sent", Src: []string{"eap_response_identity_sent"}, Dst: "eap_response_challenge_sent"},
@@ -151,6 +157,31 @@
 				}
 				service.Channel <- msg
 			},
+			"enter_auth_started": func(e *fsm.Event) {
+				go func() {
+
+				loop:
+					for {
+						select {
+						case <-service.Onu.PonPort.Olt.enableContext.Done():
+							// if the OLT is disabled, then cancel
+							break loop
+						case <-time.After(eapolWaitTime):
+							if service.EapolState.Current() != "eap_response_success_received" {
+								serviceLogger.WithFields(log.Fields{
+									"OnuId":      service.Onu.ID,
+									"IntfId":     service.Onu.PonPortID,
+									"OnuSn":      service.Onu.Sn(),
+									"Name":       service.Name,
+									"EapolState": service.EapolState.Current(),
+								}).Warn("EAPOL failed, resetting EAPOL State")
+								_ = service.EapolState.Event("auth_failed")
+							}
+						}
+
+					}
+				}()
+			},
 		},
 	)
 
@@ -159,7 +190,7 @@
 		fsm.Events{
 			// TODO only allow transitions to dhcp_start from success or failure, not in-between states
 			// TODO forcefully fail DHCP if we don't get an ack in X seconds
-			{Name: "start_dhcp", Src: []string{"created", "dhcp_discovery_sent", "dhcp_request_sent", "dhcp_ack_received", "dhcp_failed"}, Dst: "dhcp_started"},
+			{Name: "start_dhcp", Src: []string{"created", "dhcp_ack_received", "dhcp_failed"}, Dst: "dhcp_started"},
 			{Name: "dhcp_discovery_sent", Src: []string{"dhcp_started"}, Dst: "dhcp_discovery_sent"},
 			{Name: "dhcp_request_sent", Src: []string{"dhcp_discovery_sent"}, Dst: "dhcp_request_sent"},
 			{Name: "dhcp_ack_received", Src: []string{"dhcp_request_sent"}, Dst: "dhcp_ack_received"},
@@ -175,6 +206,31 @@
 				}
 				service.Channel <- msg
 			},
+			"enter_dhcp_started": func(e *fsm.Event) {
+				go func() {
+
+				loop:
+					for {
+						select {
+						case <-service.Onu.PonPort.Olt.enableContext.Done():
+							// if the OLT is disabled, then cancel
+							break loop
+						case <-time.After(dhcpWaitTime):
+							if service.DHCPState.Current() != "dhcp_ack_received" {
+								serviceLogger.WithFields(log.Fields{
+									"OnuId":     service.Onu.ID,
+									"IntfId":    service.Onu.PonPortID,
+									"OnuSn":     service.Onu.Sn(),
+									"Name":      service.Name,
+									"DHCPState": service.DHCPState.Current(),
+								}).Warn("DHCP failed, resetting DHCP State")
+								_ = service.DHCPState.Event("dhcp_failed")
+							}
+						}
+
+					}
+				}()
+			},
 		},
 	)