hs-test: pin CPUs to containers

Type: test Change-Id: I412be2dec7ff352740e50e838e0ac466bf0a6674 Signed-off-by: Adrian Villin <avillin@cisco.com>
2024-05-27 09:52:59 -04:00
parent 2b671aa3e9
commit b9464cde7d
8 changed files with 47 additions and 39 deletions
--- a/extras/hs-test/Makefile
+++ b/extras/hs-test/Makefile
@ -68,7 +68,7 @@ help:
 	@echo " UNCONFIGURE=[true|false] - unconfigure selected test"
 	@echo " DEBUG=[true|false]       - attach VPP to GDB"
 	@echo " TEST=[test-name]         - specific test to run"
-	@echo " CPUS=[n-cpus]            - number of cpus to run with vpp"
+	@echo " CPUS=[n-cpus]            - number of cpus to allocate to VPP and containers"
 	@echo " VPPSRC=[path-to-vpp-src] - path to vpp source files (for gdb)"
 	@echo " PARALLEL=[n-cpus]        - number of test processes to spawn to run in parallel"
 	@echo " REPEAT=[n]               - repeat tests up to N times or until a failure occurs"
--- a/extras/hs-test/container.go
+++ b/extras/hs-test/container.go
@ -37,6 +37,7 @@ type Container struct {
 	volumes          map[string]Volume
 	envVars          map[string]string
 	vppInstance      *VppInstance
+	allocatedCpus    []int
 }

 func newContainer(suite *HstSuite, yamlInput ContainerConfig) (*Container, error) {
@ -160,6 +161,12 @@ func (c *Container) create() error {
 	return exechelper.Run(cmd)
 }

+func (c *Container) allocateCpus() {
+	c.suite.containerCount += 1
+	c.allocatedCpus = c.suite.AllocateCpus()
+	c.suite.log("Allocated CPUs " + fmt.Sprint(c.allocatedCpus) + " to container " + c.name)
+}
+
 func (c *Container) start() error {
 	cmd := "docker start " + c.name
 	c.suite.log(cmd)
@ -175,6 +182,9 @@ func (c *Container) prepareCommand() (string, error) {
 	if c.runDetached {
 		cmd += " -d"
 	}
+
+	c.allocateCpus()
+	cmd += fmt.Sprintf(" --cpuset-cpus=\"%d-%d\"", c.allocatedCpus[0], c.allocatedCpus[len(c.allocatedCpus)-1])
 	cmd += " " + c.getContainerArguments()

 	c.suite.log(cmd)
@ -239,7 +249,6 @@ func (c *Container) newVppInstance(cpus []int, additionalConfigs ...Stanza) (*Vp
 	vpp := new(VppInstance)
 	vpp.container = c
 	vpp.cpus = cpus
-	c.suite.vppContainerCount += 1
 	vpp.additionalConfig = append(vpp.additionalConfig, additionalConfigs...)
 	c.vppInstance = vpp
 	return vpp, nil
@ -287,7 +296,7 @@ func (c *Container) exec(command string, arguments ...any) string {
 	GinkgoHelper()
 	c.suite.log(containerExecCommand)
 	byteOutput, err := exechelper.CombinedOutput(containerExecCommand)
-	c.suite.assertNil(err, err)
+	c.suite.assertNil(err, fmt.Sprint(err))
 	return string(byteOutput)
 }

--- a/extras/hs-test/cpu.go
+++ b/extras/hs-test/cpu.go
@ -23,22 +23,26 @@ type CpuAllocatorT struct {

 var cpuAllocator *CpuAllocatorT = nil

-func (c *CpuAllocatorT) Allocate(vppContainerCount int, nCpus int) (*CpuContext, error) {
+func (c *CpuAllocatorT) Allocate(containerCount int, nCpus int) (*CpuContext, error) {
 	var cpuCtx CpuContext
-	maxCpu := GinkgoParallelProcess() * 2 * nCpus
-	minCpu := (GinkgoParallelProcess() - 1) * 2 * nCpus
-	if len(c.cpus) < maxCpu {
-		vppContainerCount += 1
+
+	// splitting cpus into equal parts; this will over-allocate cores but it's good enough for now
+	maxContainerCount := 4
+	// skip CPU 0
+	minCpu := ((GinkgoParallelProcess() - 1) * maxContainerCount * nCpus) + 1
+	maxCpu := (GinkgoParallelProcess() * maxContainerCount * nCpus)
+
+	if len(c.cpus)-1 < maxCpu {
 		err := fmt.Errorf("could not allocate %d CPUs; available: %d; attempted to allocate cores %d-%d",
-			nCpus*vppContainerCount, len(c.cpus), minCpu, minCpu+nCpus*vppContainerCount)
+			nCpus*containerCount, len(c.cpus)-1, minCpu, maxCpu)
 		return nil, err
 	}
-	if vppContainerCount == 0 {
-		cpuCtx.cpus = c.cpus[minCpu : maxCpu-nCpus]
-	} else if vppContainerCount == 1 {
-		cpuCtx.cpus = c.cpus[minCpu+nCpus : maxCpu]
+	if containerCount == 1 {
+		cpuCtx.cpus = c.cpus[minCpu : minCpu+nCpus]
+	} else if containerCount > 1 && containerCount <= maxContainerCount {
+		cpuCtx.cpus = c.cpus[minCpu+(nCpus*(containerCount-1)) : minCpu+(nCpus*containerCount)]
 	} else {
-		return nil, fmt.Errorf("too many VPP containers; CPU allocation for >2 VPP containers is not implemented yet")
+		return nil, fmt.Errorf("too many containers; CPU allocation for >%d containers is not implemented", maxContainerCount)
 	}

 	cpuCtx.cpuAllocator = c
--- a/extras/hs-test/hst_suite.go
+++ b/extras/hs-test/hst_suite.go
@ -31,7 +31,7 @@ var vppSourceFileDir = flag.String("vppsrc", "", "vpp source file directory")

 type HstSuite struct {
 	containers       map[string]*Container
-	vppContainerCount int
+	containerCount   int
 	volumes          []string
 	netConfigs       []NetConfig
 	netInterfaces    map[string]*NetInterface
@ -62,7 +62,7 @@ func (s *HstSuite) SetupSuite() {
 }

 func (s *HstSuite) AllocateCpus() []int {
-	cpuCtx, err := s.cpuAllocator.Allocate(s.vppContainerCount, s.cpuPerVpp)
+	cpuCtx, err := s.cpuAllocator.Allocate(s.containerCount, s.cpuPerVpp)
 	s.assertNil(err)
 	s.AddCpuContext(cpuCtx)
 	return cpuCtx.cpus
@ -96,7 +96,7 @@ func (s *HstSuite) skipIfUnconfiguring() {

 func (s *HstSuite) SetupTest() {
 	s.log("Test Setup")
-	s.vppContainerCount = 0
+	s.containerCount = 0
 	s.skipIfUnconfiguring()
 	s.setupVolumes()
 	s.setupContainers()
--- a/extras/hs-test/suite_nginx_test.go
+++ b/extras/hs-test/suite_nginx_test.go
@ -48,10 +48,9 @@ func (s *NginxSuite) SetupTest() {
 		append("enable").
 		append("use-app-socket-api").close()

-	cpus := s.AllocateCpus()
 	// ... for proxy
 	vppProxyContainer := s.getContainerByName(vppProxyContainerName)
-	proxyVpp, _ := vppProxyContainer.newVppInstance(cpus, sessionConfig)
+	proxyVpp, _ := vppProxyContainer.newVppInstance(vppProxyContainer.allocatedCpus, sessionConfig)
 	s.assertNil(proxyVpp.start())

 	clientInterface := s.getInterfaceByName(mirroringClientInterfaceName)
--- a/extras/hs-test/suite_no_topo_test.go
+++ b/extras/hs-test/suite_no_topo_test.go
@ -45,9 +45,8 @@ func (s *NoTopoSuite) SetupTest() {
 		append("enable").
 		append("use-app-socket-api").close()

-	cpus := s.AllocateCpus()
 	container := s.getContainerByName(singleTopoContainerVpp)
-	vpp, _ := container.newVppInstance(cpus, sessionConfig)
+	vpp, _ := container.newVppInstance(container.allocatedCpus, sessionConfig)
 	s.assertNil(vpp.start())

 	tapInterface := s.getInterfaceByName(tapInterfaceName)
--- a/extras/hs-test/suite_ns_test.go
+++ b/extras/hs-test/suite_ns_test.go
@ -48,9 +48,8 @@ func (s *NsSuite) SetupTest() {
 		append("evt_qs_memfd_seg").
 		append("event-queue-length 100000").close()

-	cpus := s.AllocateCpus()
 	container := s.getContainerByName("vpp")
-	vpp, _ := container.newVppInstance(cpus, sessionConfig)
+	vpp, _ := container.newVppInstance(container.allocatedCpus, sessionConfig)
 	s.assertNil(vpp.start())

 	idx, err := vpp.createAfPacket(s.getInterfaceByName(serverInterface))
--- a/extras/hs-test/suite_veth_test.go
+++ b/extras/hs-test/suite_veth_test.go
@ -50,8 +50,7 @@ func (s *VethsSuite) SetupTest() {
 	// ... For server
 	serverContainer := s.getContainerByName("server-vpp")

-	cpus := s.AllocateCpus()
-	serverVpp, err := serverContainer.newVppInstance(cpus, sessionConfig)
+	serverVpp, err := serverContainer.newVppInstance(serverContainer.allocatedCpus, sessionConfig)
 	s.assertNotNil(serverVpp, fmt.Sprint(err))

 	s.setupServerVpp()
@ -59,8 +58,7 @@ func (s *VethsSuite) SetupTest() {
 	// ... For client
 	clientContainer := s.getContainerByName("client-vpp")

-	cpus = s.AllocateCpus()
-	clientVpp, err := clientContainer.newVppInstance(cpus, sessionConfig)
+	clientVpp, err := clientContainer.newVppInstance(clientContainer.allocatedCpus, sessionConfig)
 	s.assertNotNil(clientVpp, fmt.Sprint(err))

 	s.setupClientVpp()