diff --git a/Makefile b/Makefile
index f578902..7a26eb1 100644
--- a/Makefile
+++ b/Makefile
@@ -50,6 +50,22 @@ build-dracpu: ## build dracpu
 clean: ## clean
 	rm -rf "$(OUT_DIR)/"
 
+with-kind: ## run a command with a temporary kind cluster (create if missing, delete on exit if created)
+	@if [ -z "$$CMD" ]; then \
+		echo "CMD is required. Example: CMD='echo hello' $(MAKE) with-kind"; \
+		exit 1; \
+	fi; \
+	created=0; \
+	while read -r name; do \
+		if [ "$$name" = "$(CLUSTER_NAME)" ]; then created=2; fi; \
+	done < <(kind get clusters 2>/dev/null || true); \
+	if [ "$$created" -eq 0 ]; then \
+		kind create cluster --name ${CLUSTER_NAME} --config hack/kind.yaml; \
+		created=1; \
+	fi; \
+	trap 'if [ "$$created" -eq 1 ]; then kind delete cluster --name ${CLUSTER_NAME}; fi' EXIT; \
+	bash -c "$$CMD"
+
 test-unit: ## run tests
 	CGO_ENABLED=1 go test -v -race -count 1 -coverprofile=coverage.out ./pkg/...
 
@@ -96,7 +112,16 @@ export DOCKER_CLI_EXPERIMENTAL=enabled
 image: ## docker build load
 	docker build . -t ${STAGING_IMAGE_NAME} --load
 
-build-image: ## build image
+build-image: ## build image (skip if already exists unless FORCE_BUILD=1)
+	@if [ "$(FORCE_BUILD)" = "1" ]; then \
+		$(MAKE) build-image-force; \
+	elif docker image inspect "${IMAGE}" >/dev/null 2>&1; then \
+		echo "Image ${IMAGE} already exists; skipping build."; \
+	else \
+		$(MAKE) build-image-force; \
+	fi
+
+build-image-force: ## force build image
 	docker buildx build . \
 		--platform="${PLATFORMS}" \
 		--tag="${IMAGE}" \
@@ -116,6 +141,9 @@ kind-cluster:  ## create kind cluster
 kind-load-image: build-image  ## load the current container image into kind
 	kind load docker-image ${IMAGE} ${IMAGE_LATEST} --name ${CLUSTER_NAME}
 
+kind-load-test-image: build-test-image ## load the test image into kind
+	kind load docker-image ${IMAGE_TEST} --name ${CLUSTER_NAME}
+
 kind-uninstall-cpu-dra: ## remove cpu dra from kind cluster
 	kubectl delete -f install.yaml || true
 
@@ -162,7 +190,16 @@ ifneq ($(DRACPU_E2E_VERBOSE),)
 endif
 	$(call kind_setup,$(CI_MANIFEST_FILE))
 
-build-test-image: ## build tests image
+build-test-image: ## build tests image (skip if already exists unless FORCE_BUILD=1)
+	@if [ "$(FORCE_BUILD)" = "1" ]; then \
+		$(MAKE) build-test-image-force; \
+	elif docker image inspect "${IMAGE_TEST}" >/dev/null 2>&1; then \
+		echo "Image ${IMAGE_TEST} already exists; skipping build."; \
+	else \
+		$(MAKE) build-test-image-force; \
+	fi
+
+build-test-image-force: ## force build tests image
 	docker buildx build . \
 		--file test/image/Dockerfile \
 		--platform="${PLATFORMS}" \
@@ -175,11 +212,36 @@ build-test-dracputester: ## build helper to serve as entry point and report cpu
 build-test-dracpuinfo: ## build helper to expose hardware info in the internal dracpu format
 	go build -v -o "$(OUT_DIR)/dracpuinfo" ./test/image/dracpuinfo
 
-test-e2e: ## run e2e test against an existing configured cluster
-	env DRACPU_E2E_TEST_IMAGE=$(IMAGE_TEST) DRACPU_E2E_RESERVED_CPUS=$(DRACPU_E2E_RESERVED_CPUS) go test -v ./test/e2e/ --ginkgo.v
+# E2E: shared setup and run targets (assume cluster exists; use with-kind for "create if missing, delete if we created").
+define e2e_daemonset_patch
+[{"op":"replace","path":"/spec/template/spec/containers/0/args","value":["/dracpu","--v=4","--cpu-device-mode=$(1)","--reserved-cpus=$(DRACPU_E2E_RESERVED_CPUS)"]}]
+endef
+
+kind-e2e-setup: ## label workers, load test image, install DRA, wait for rollout (cluster must exist)
+	kubectl label nodes -l '!node-role.kubernetes.io/control-plane' node-role.kubernetes.io/worker='' --overwrite
+	$(MAKE) kind-load-test-image kind-install-cpu-dra
+	kubectl -n kube-system rollout status daemonset/dracpu --timeout=120s
+
+test-e2e-grouped-run: ## patch daemonset to grouped and run e2e (requires kind-e2e-setup)
+	kubectl -n kube-system patch daemonset dracpu --type=json -p='$(call e2e_daemonset_patch,grouped)'
+	env DRACPU_E2E_TEST_IMAGE=$(IMAGE_TEST) DRACPU_E2E_RESERVED_CPUS=$(DRACPU_E2E_RESERVED_CPUS) DRACPU_E2E_CPU_DEVICE_MODE=grouped go test -v ./test/e2e/ --ginkgo.v
+
+test-e2e-individual-run: ## patch daemonset to individual and run e2e (requires kind-e2e-setup)
+	kubectl -n kube-system patch daemonset dracpu --type=json -p='$(call e2e_daemonset_patch,individual)'
+	kubectl -n kube-system rollout status daemonset/dracpu --timeout=120s
+	env DRACPU_E2E_TEST_IMAGE=$(IMAGE_TEST) DRACPU_E2E_RESERVED_CPUS=$(DRACPU_E2E_RESERVED_CPUS) DRACPU_E2E_CPU_DEVICE_MODE=individual go test -v ./test/e2e/ --ginkgo.v
+
+test-e2e: ## run e2e in both grouped and individual mode (one cluster: create if missing, run both, delete if we created)
+	CMD='$(MAKE) kind-e2e-setup; $(MAKE) test-e2e-grouped-run; r1=$$?; $(MAKE) test-e2e-individual-run; r2=$$?; exit $$((r1|r2))' $(MAKE) with-kind
 
 test-e2e-kind: ci-kind-setup test-e2e ## run e2e test against a purpose-built kind cluster
 
+test-e2e-grouped-mode: ## run e2e tests in grouped mode (with-kind)
+	CMD='$(MAKE) kind-e2e-setup test-e2e-grouped-run' $(MAKE) with-kind
+
+test-e2e-individual-mode: ## run e2e tests in individual mode (with-kind)
+	CMD='$(MAKE) kind-e2e-setup test-e2e-individual-run' $(MAKE) with-kind
+
 lint:  ## run the linter against the codebase
 	$(GOLANGCI_LINT) run ./...
 
diff --git a/test/e2e/cpu_assignment_test.go b/test/e2e/cpu_assignment_test.go
index cfa3b42..8cfb27d 100644
--- a/test/e2e/cpu_assignment_test.go
+++ b/test/e2e/cpu_assignment_test.go
@@ -60,13 +60,14 @@ Please note "Serial" is however unavoidable because we manage the shared node st
 */
 var _ = ginkgo.Describe("CPU Allocation", ginkgo.Serial, ginkgo.Ordered, ginkgo.ContinueOnFailure, func() {
 	var (
-		rootFxt           *fixture.Fixture
-		targetNode        *v1.Node
-		targetNodeCPUInfo discovery.DRACPUInfo
-		availableCPUs     cpuset.CPUSet
-		dracpuTesterImage string
-		reservedCPUs      cpuset.CPUSet
-		cpuDeviceMode     string
+		rootFxt             *fixture.Fixture
+		targetNode          *v1.Node
+		targetNodeCPUInfo   discovery.DRACPUInfo
+		availableCPUs       cpuset.CPUSet
+		availableCPUsByNode map[string]cpuset.CPUSet
+		dracpuTesterImage   string
+		reservedCPUs        cpuset.CPUSet
+		cpuDeviceMode       string
 	)
 
 	ginkgo.BeforeAll(func(ctx context.Context) {
@@ -106,38 +107,50 @@ var _ = ginkgo.Describe("CPU Allocation", ginkgo.Serial, ginkgo.Ordered, ginkgo.
 		gomega.Expect(dsReservedCPUs.Equals(reservedCPUs)).To(gomega.BeTrue(), "daemonset reserved cpus %v do not match test reserved cpus %v", dsReservedCPUs.String(), reservedCPUs.String())
 		rootFxt.Log.Info("daemonset --cpu-device-mode configuration", "mode", cpuDeviceMode)
 
+		var workerNodes []*v1.Node
 		if targetNodeName := os.Getenv("DRACPU_E2E_TARGET_NODE"); len(targetNodeName) > 0 {
 			targetNode, err = rootFxt.K8SClientset.CoreV1().Nodes().Get(ctx, targetNodeName, metav1.GetOptions{})
 			gomega.Expect(err).ToNot(gomega.HaveOccurred(), "cannot get worker node %q: %v", targetNodeName, err)
+			workerNodes = []*v1.Node{targetNode}
 		} else {
 			gomega.Eventually(func() error {
-				workerNodes, err := node.FindWorkers(ctx, infraFxt.K8SClientset)
-				if err != nil {
-					return err
+				var findErr error
+				workerNodes, findErr = node.FindWorkers(ctx, infraFxt.K8SClientset)
+				if findErr != nil {
+					return findErr
 				}
 				if len(workerNodes) == 0 {
 					return fmt.Errorf("no worker nodes detected")
 				}
-				targetNode = workerNodes[0] // pick random one, this is the simplest random pick
+				targetNode = workerNodes[0]
 				return nil
 			}).WithTimeout(1*time.Minute).WithPolling(5*time.Second).Should(gomega.Succeed(), "failed to find any worker node")
 		}
 		rootFxt.Log.Info("using worker node", "nodeName", targetNode.Name)
 
-		infoPod := discovery.MakePod(infraFxt.Namespace.Name, dracpuTesterImage)
-		infoPod = e2epod.PinToNode(infoPod, targetNode.Name)
-		infoPod, err = e2epod.RunToCompletion(ctx, infraFxt.K8SClientset, infoPod)
-		gomega.Expect(err).ToNot(gomega.HaveOccurred(), "cannot create discovery pod: %v", err)
-		data, err := e2epod.GetLogs(infraFxt.K8SClientset, ctx, infoPod.Namespace, infoPod.Name, infoPod.Spec.Containers[0].Name)
-		gomega.Expect(err).ToNot(gomega.HaveOccurred(), "cannot get logs from discovery pod: %v", err)
-		gomega.Expect(json.Unmarshal([]byte(data), &targetNodeCPUInfo)).To(gomega.Succeed())
-
-		allocatableCPUs := makeCPUSetFromDiscoveredCPUInfo(targetNodeCPUInfo)
-		availableCPUs = allocatableCPUs.Difference(reservedCPUs)
-		if reservedCPUs.Size() > 0 {
-			gomega.Expect(availableCPUs.Intersection(reservedCPUs).Size()).To(gomega.BeZero(), "available cpus %v overlap with reserved cpus %v", availableCPUs.String(), reservedCPUs.String())
+		availableCPUsByNode = make(map[string]cpuset.CPUSet)
+		for _, n := range workerNodes {
+			infoPod := discovery.MakePod(infraFxt.Namespace.Name, dracpuTesterImage)
+			infoPod.Name = "discovery-pod-" + n.Name
+			infoPod = e2epod.PinToNode(infoPod, n.Name)
+			infoPod, err = e2epod.RunToCompletion(ctx, infraFxt.K8SClientset, infoPod)
+			gomega.Expect(err).ToNot(gomega.HaveOccurred(), "cannot create discovery pod on node %q: %v", n.Name, err)
+			data, err := e2epod.GetLogs(infraFxt.K8SClientset, ctx, infoPod.Namespace, infoPod.Name, infoPod.Spec.Containers[0].Name)
+			gomega.Expect(err).ToNot(gomega.HaveOccurred(), "cannot get logs from discovery pod: %v", err)
+			var nodeCPUInfo discovery.DRACPUInfo
+			gomega.Expect(json.Unmarshal([]byte(data), &nodeCPUInfo)).To(gomega.Succeed())
+			if n.Name == targetNode.Name {
+				targetNodeCPUInfo = nodeCPUInfo
+			}
+			allocatable := makeCPUSetFromDiscoveredCPUInfo(nodeCPUInfo)
+			available := allocatable.Difference(reservedCPUs)
+			if reservedCPUs.Size() > 0 {
+				gomega.Expect(available.Intersection(reservedCPUs).Size()).To(gomega.BeZero(), "node %q: available cpus %v overlap reserved %v", n.Name, available.String(), reservedCPUs.String())
+			}
+			availableCPUsByNode[n.Name] = available
+			rootFxt.Log.Info("checking worker node", "nodeName", n.Name, "coreCount", len(nodeCPUInfo.CPUs), "allocatableCPUs", allocatable.String(), "availableCPUs", available.String())
 		}
-		rootFxt.Log.Info("checking worker node", "nodeName", infoPod.Spec.NodeName, "coreCount", len(targetNodeCPUInfo.CPUs), "allocatableCPUs", allocatableCPUs.String(), "reservedCPUs", reservedCPUs.String(), "availableCPUs", availableCPUs.String())
+		availableCPUs = availableCPUsByNode[targetNode.Name]
 	})
 
 	ginkgo.When("setting resource claims", func() {
@@ -184,7 +197,8 @@ var _ = ginkgo.Describe("CPU Allocation", ginkgo.Serial, ginkgo.Ordered, ginkgo.
 
 				fxt.Log.Info("Creating pods requesting exclusive CPUs", "numPods", numPods, "cpusPerClaim", cpusPerClaim)
 				var exclPods []*v1.Pod
-				allAllocatedCPUs := cpuset.New()
+				// CPU IDs are per-node; track allocations per node so we don't treat same IDs on different nodes as overlapping.
+				allAllocatedCPUsByNode := make(map[string]cpuset.CPUSet)
 
 				claimTemplate := resourcev1.ResourceClaimTemplate{
 					ObjectMeta: metav1.ObjectMeta{
@@ -205,25 +219,58 @@ var _ = ginkgo.Describe("CPU Allocation", ginkgo.Serial, ginkgo.Ordered, ginkgo.
 
 				fixture.By("Verifying CPU allocations for each exclusive pod")
 				for i, pod := range exclPods {
-					alloc := getTesterPodCPUAllocation(fxt.K8SClientset, ctx, pod)
+					var alloc CPUAllocation
+					nodeName := pod.Spec.NodeName
+					gomega.Eventually(func() error {
+						alloc = getTesterPodCPUAllocation(fxt.K8SClientset, ctx, pod)
+						if alloc.CPUAssigned.Size() != cpusPerClaim {
+							return fmt.Errorf("pod %d: got %d CPUs, want %d", i, alloc.CPUAssigned.Size(), cpusPerClaim)
+						}
+						availableOnNode := availableCPUsByNode[nodeName]
+						if availableOnNode.Size() == 0 {
+							availableOnNode = availableCPUs
+						}
+						if !alloc.CPUAssigned.IsSubsetOf(availableOnNode) {
+							return fmt.Errorf("pod %d on node %s: CPUs %s outside node available set %s", i, nodeName, alloc.CPUAssigned.String(), availableOnNode.String())
+						}
+						allocatedOnNode := allAllocatedCPUsByNode[nodeName]
+						if allocatedOnNode.Intersection(alloc.CPUAssigned).Size() != 0 {
+							return fmt.Errorf("pod %d: overlapping CPUs with previous pods on node %s", i, nodeName)
+						}
+						return nil
+					}).WithTimeout(2*time.Minute).WithPolling(5*time.Second).Should(gomega.Succeed(), "exclusive pod %d allocation did not stabilize", i)
 					fxt.Log.Info("Checking exclusive CPU allocation", "pod", e2epod.Identify(pod), "cpuAllocated", alloc.CPUAssigned.String())
-					gomega.Expect(alloc.CPUAssigned.Size()).To(gomega.Equal(cpusPerClaim), "Pod %d did not get %d CPUs", i, cpusPerClaim)
-					gomega.Expect(alloc.CPUAssigned.IsSubsetOf(availableCPUs)).To(gomega.BeTrue(), "Pod %d got CPUs outside available set", i)
-					gomega.Expect(allAllocatedCPUs.Intersection(alloc.CPUAssigned).Size()).To(gomega.Equal(0), "Pod %d has overlapping CPUs", i)
-					allAllocatedCPUs = allAllocatedCPUs.Union(alloc.CPUAssigned)
+					allAllocatedCPUsByNode[nodeName] = allAllocatedCPUsByNode[nodeName].Union(alloc.CPUAssigned)
+				}
+				var totalAllocated int
+				for _, cpus := range allAllocatedCPUsByNode {
+					totalAllocated += cpus.Size()
+				}
+				gomega.Expect(totalAllocated).To(gomega.Equal(numPods * cpusPerClaim))
+				rootFxt.Log.Info("All exclusive allocation", "byNode", allAllocatedCPUsByNode, "expected Shared CPUs on target", availableCPUs.Difference(allAllocatedCPUsByNode[targetNode.Name]).String())
+
+				fixture.By("checking the shared pool does not include anymore the exclusively allocated CPUs on each node with exclusive pods")
+				var shrPod2 *v1.Pod
+				var otherNodeSharedPods []*v1.Pod
+				for nodeName := range allAllocatedCPUsByNode {
+					availableOnNode := availableCPUsByNode[nodeName]
+					if availableOnNode.Size() == 0 {
+						availableOnNode = availableCPUs
+					}
+					expectedSharedOnNode := availableOnNode.Difference(allAllocatedCPUsByNode[nodeName])
+					if nodeName == targetNode.Name {
+						fixture.By("creating a second best-effort reference pod on target node")
+						shrPod2 = mustCreateBestEffortPod(ctx, fxt, targetNode.Name, dracpuTesterImage)
+						verifySharedPoolMatches(ctx, fxt, shrPod2, expectedSharedOnNode)
+						ginkgo.By("checking the CPU pool of the best-effort pod created before the pods with CPU resource claims")
+						verifySharedPoolMatches(ctx, fxt, shrPod1, expectedSharedOnNode)
+						continue
+					}
+					fixture.By("creating best-effort pod on node %s to verify shared pool", nodeName)
+					shrPodOther := mustCreateBestEffortPod(ctx, fxt, nodeName, dracpuTesterImage)
+					verifySharedPoolMatches(ctx, fxt, shrPodOther, expectedSharedOnNode)
+					otherNodeSharedPods = append(otherNodeSharedPods, shrPodOther)
 				}
-				gomega.Expect(allAllocatedCPUs.Size()).To(gomega.Equal(numPods * cpusPerClaim))
-				rootFxt.Log.Info("All exclusive allocation", "pod", "exclusive CPUs", allAllocatedCPUs.String(), "expected Shared CPUs", availableCPUs.Difference(allAllocatedCPUs).String())
-
-				fixture.By("checking the shared pool does not include anymore the exclusively allocated CPUs")
-				expectedSharedCPUs := availableCPUs.Difference(allAllocatedCPUs)
-
-				fixture.By("creating a second best-effort reference pod")
-				shrPod2 := mustCreateBestEffortPod(ctx, fxt, targetNode.Name, dracpuTesterImage)
-				verifySharedPoolMatches(ctx, fxt, shrPod2, expectedSharedCPUs)
-
-				ginkgo.By("checking the CPU pool of the best-effort pod created before the pods with CPU resource claims")
-				verifySharedPoolMatches(ctx, fxt, shrPod1, expectedSharedCPUs)
 
 				fixture.By("deleting the pods with exclusive CPUs")
 				for _, pod := range exclPods {
@@ -231,7 +278,18 @@ var _ = ginkgo.Describe("CPU Allocation", ginkgo.Serial, ginkgo.Ordered, ginkgo.
 				}
 
 				verifySharedPoolMatches(ctx, fxt, shrPod1, availableCPUs)
-				verifySharedPoolMatches(ctx, fxt, shrPod2, availableCPUs)
+				if shrPod2 != nil {
+					verifySharedPoolMatches(ctx, fxt, shrPod2, availableCPUs)
+				}
+				for _, shrPodOther := range otherNodeSharedPods {
+					nodeName := shrPodOther.Spec.NodeName
+					availableOnNode := availableCPUsByNode[nodeName]
+					if availableOnNode.Size() == 0 {
+						availableOnNode = availableCPUs
+					}
+					verifySharedPoolMatches(ctx, fxt, shrPodOther, availableOnNode)
+					gomega.Expect(e2epod.DeleteSync(ctx, fxt.K8SClientset, shrPodOther)).To(gomega.Succeed(), "cannot delete helper pod %s", e2epod.Identify(shrPodOther))
+				}
 			})
 		})
 	})
@@ -248,5 +306,5 @@ func verifySharedPoolMatches(ctx context.Context, fxt *fixture.Fixture, sharedPo
 			return fmt.Errorf("shared CPUs mismatch: expected %v got %v", expectedSharedCPUs.String(), sharedAllocUpdated.CPUAssigned.String())
 		}
 		return nil
-	}).WithTimeout(1*time.Minute).WithPolling(5*time.Second).Should(gomega.Succeed(), "the best-effort tester pod %s does not have access to the exclusively allocated CPUs", e2epod.Identify(sharedPod))
+	}).WithTimeout(2*time.Minute).WithPolling(5*time.Second).Should(gomega.Succeed(), "the best-effort tester pod %s CPU pool did not match expected %s", e2epod.Identify(sharedPod), expectedSharedCPUs.String())
 }
diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go
index d283450..981d45e 100644
--- a/test/e2e/e2e_suite_test.go
+++ b/test/e2e/e2e_suite_test.go
@@ -66,7 +66,7 @@ func BeFailedToCreate(fxt *fixture.Fixture) types.GomegaMatcher {
 			return false, nil
 		}
 		if cntSt.State.Waiting.Reason != reasonCreateContainerError {
-			lh.Info("container terminated for different reason", "containerName", cntSt.Name, "reason", cntSt.State.Terminated.Reason)
+			lh.Info("container waiting for different reason", "containerName", cntSt.Name, "reason", cntSt.State.Waiting.Reason)
 			return false, nil
 		}
 		lh.Info("container creation error", "containerName", cntSt.Name)
diff --git a/test/e2e/sharing_test.go b/test/e2e/sharing_test.go
index 6b4b85e..878ffdb 100644
--- a/test/e2e/sharing_test.go
+++ b/test/e2e/sharing_test.go
@@ -147,7 +147,7 @@ var _ = ginkgo.Describe("Claim sharing", ginkgo.Serial, ginkgo.Ordered, ginkgo.C
 			gomega.Expect(fxt.Teardown(ctx)).To(gomega.Succeed())
 		})
 
-		ginkgo.It("should fail to run pods which share a claim", func(ctx context.Context) {
+		ginkgo.It("should fail to run pods which share a claim", ginkgo.SpecTimeout(5*time.Minute), func(ctx context.Context) {
 			testPod := v1.Pod{
 				ObjectMeta: metav1.ObjectMeta{
 					Namespace:    fxt.Namespace.Name,
@@ -196,10 +196,10 @@ var _ = ginkgo.Describe("Claim sharing", ginkgo.Serial, ginkgo.Ordered, ginkgo.C
 					return nil
 				}
 				return pod
-			}).WithTimeout(time.Minute).WithPolling(2 * time.Second).Should(BeFailedToCreate(fxt))
+			}).WithTimeout(2 * time.Minute).WithPolling(2 * time.Second).Should(BeFailedToCreate(fxt))
 		})
 
-		ginkgo.It("should fail to run a pod with multiple containers which share a claim", ginkgo.Label("negative"), func(ctx context.Context) {
+		ginkgo.It("should fail to run a pod with multiple containers which share a claim", ginkgo.Label("negative"), ginkgo.SpecTimeout(5*time.Minute), func(ctx context.Context) {
 			testPod := v1.Pod{
 				ObjectMeta: metav1.ObjectMeta{
 					Namespace: fxt.Namespace.Name,
@@ -259,7 +259,7 @@ var _ = ginkgo.Describe("Claim sharing", ginkgo.Serial, ginkgo.Ordered, ginkgo.C
 					return nil
 				}
 				return pod
-			}).WithTimeout(time.Minute).WithPolling(2 * time.Second).Should(BeFailedToCreate(fxt))
+			}).WithTimeout(2 * time.Minute).WithPolling(2 * time.Second).Should(BeFailedToCreate(fxt))
 		})
 	})
 })
diff --git a/test/image/dracputester/app.go b/test/image/dracputester/app.go
index aa0383b..a42126e 100644
--- a/test/image/dracputester/app.go
+++ b/test/image/dracputester/app.go
@@ -23,10 +23,10 @@ import (
 	"os"
 	"os/signal"
 	"path/filepath"
-	"runtime"
 	"strings"
 	"time"
 
+	"github.com/kubernetes-sigs/dra-driver-cpu/pkg/cpuinfo"
 	"github.com/kubernetes-sigs/dra-driver-cpu/test/pkg/discovery"
 	"golang.org/x/sys/unix"
 	"k8s.io/utils/cpuset"
@@ -35,6 +35,9 @@ import (
 const (
 	cgroupPath = "fs/cgroup"
 	cpusetFile = "cpuset.cpus.effective"
+	// affinityScanMax is the upper bound when scanning sched_getaffinity if topology is unavailable.
+	// Using runtime.NumCPU() would miss CPUs when the cgroup cpuset is non-contiguous (e.g. 2-5,9-13).
+	affinityScanMax = 2048
 )
 
 func cpuSetPath(sysRoot string) string {
@@ -49,20 +52,49 @@ func cpuSet(sysRoot string) (cpuset.CPUSet, error) {
 	return cpuset.Parse(strings.TrimSpace(string(data)))
 }
 
+// affinityMask is satisfied by unix.CPUSet and test doubles.
+type affinityMask interface {
+	IsSet(i int) bool
+}
+
+// affinityFromMask scans the mask from 0 to maxCPUID (exclusive) and returns the set of set CPU IDs.
+// If maxCPUID <= 0, affinityScanMax is used.
+func affinityFromMask(mask affinityMask, maxCPUID int) cpuset.CPUSet {
+	if maxCPUID <= 0 {
+		maxCPUID = affinityScanMax
+	}
+	var allowedCPUs []int
+	for i := 0; i < maxCPUID; i++ {
+		if mask.IsSet(i) {
+			allowedCPUs = append(allowedCPUs, i)
+		}
+	}
+	return cpuset.New(allowedCPUs...)
+}
+
+func affinityScanBoundFromTopology(topo *cpuinfo.CPUTopology) int {
+	if topo == nil || topo.NumCPUs == 0 {
+		return affinityScanMax
+	}
+	cpus := topo.CPUDetails.CPUs()
+	if cpus.Size() == 0 {
+		return topo.NumCPUs
+	}
+	list := cpus.List()
+	return list[len(list)-1] + 1
+}
+
 func getAffinity() (cpuset.CPUSet, error) {
 	var unixCS unix.CPUSet
 	err := unix.SchedGetaffinity(os.Getpid(), &unixCS)
 	if err != nil {
 		return cpuset.New(), err
 	}
-
-	var allowedCPUs []int
-	for i := 0; i < runtime.NumCPU(); i++ {
-		if unixCS.IsSet(i) {
-			allowedCPUs = append(allowedCPUs, i)
-		}
+	maxCPUID := 0
+	if topo, err := cpuinfo.NewSystemCPUInfo().GetCPUTopology(); err == nil {
+		maxCPUID = affinityScanBoundFromTopology(topo)
 	}
-	return cpuset.New(allowedCPUs...), nil
+	return affinityFromMask(&unixCS, maxCPUID), nil
 }
 
 func main() {
diff --git a/test/image/dracputester/app_test.go b/test/image/dracputester/app_test.go
new file mode 100644
index 0000000..106b78a
--- /dev/null
+++ b/test/image/dracputester/app_test.go
@@ -0,0 +1,108 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+	"testing"
+
+	"github.com/kubernetes-sigs/dra-driver-cpu/pkg/cpuinfo"
+	"k8s.io/utils/cpuset"
+)
+
+type maskFromSet struct{ cpus cpuset.CPUSet }
+
+func (m maskFromSet) IsSet(i int) bool { return m.cpus.Contains(i) }
+
+func TestAffinityFromMask(t *testing.T) {
+	tests := []struct {
+		name     string
+		mask     affinityMask
+		maxCPUID int
+		want     cpuset.CPUSet
+	}{
+		{
+			name:     "empty",
+			mask:     maskFromSet{cpuset.New()},
+			maxCPUID: 0,
+			want:     cpuset.New(),
+		},
+		{
+			name:     "single CPU",
+			mask:     maskFromSet{cpuset.New(3)},
+			maxCPUID: 0,
+			want:     cpuset.New(3),
+		},
+		{
+			name:     "split set 2-5 and 9-13 (bug case: high IDs beyond NumCPU())",
+			mask:     maskFromSet{mustParse(t, "2-5,9-13")},
+			maxCPUID: 0,
+			want:     mustParse(t, "2-5,9-13"),
+		},
+		{
+			name:     "high CPU IDs only",
+			mask:     maskFromSet{cpuset.New(100, 101, 200)},
+			maxCPUID: 0,
+			want:     cpuset.New(100, 101, 200),
+		},
+		{
+			name:     "explicit bound 20",
+			mask:     maskFromSet{cpuset.New(2, 5, 9)},
+			maxCPUID: 20,
+			want:     cpuset.New(2, 5, 9),
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := affinityFromMask(tt.mask, tt.maxCPUID)
+			if !got.Equals(tt.want) {
+				t.Errorf("affinityFromMask() = %s, want %s", got.String(), tt.want.String())
+			}
+		})
+	}
+}
+
+func TestAffinityScanBoundFromTopology(t *testing.T) {
+	t.Run("nil returns default", func(t *testing.T) {
+		if got := affinityScanBoundFromTopology(nil); got != affinityScanMax {
+			t.Errorf("affinityScanBoundFromTopology(nil) = %d, want %d", got, affinityScanMax)
+		}
+	})
+	t.Run("empty CPUDetails uses NumCPUs", func(t *testing.T) {
+		topo := &cpuinfo.CPUTopology{NumCPUs: 16, CPUDetails: cpuinfo.CPUDetails{}}
+		if got := affinityScanBoundFromTopology(topo); got != 16 {
+			t.Errorf("affinityScanBoundFromTopology() = %d, want 16", got)
+		}
+	})
+	t.Run("max CPU ID plus one", func(t *testing.T) {
+		topo := &cpuinfo.CPUTopology{
+			NumCPUs:    4,
+			CPUDetails: cpuinfo.CPUDetails{2: cpuinfo.CPUInfo{}, 3: cpuinfo.CPUInfo{}, 5: cpuinfo.CPUInfo{}, 9: cpuinfo.CPUInfo{}},
+		}
+		if got := affinityScanBoundFromTopology(topo); got != 10 {
+			t.Errorf("affinityScanBoundFromTopology() = %d, want 10", got)
+		}
+	})
+}
+
+func mustParse(t *testing.T, s string) cpuset.CPUSet {
+	t.Helper()
+	cpus, err := cpuset.Parse(s)
+	if err != nil {
+		t.Fatalf("cpuset.Parse(%q): %v", s, err)
+	}
+	return cpus
+}
diff --git a/test/pkg/pod/pod.go b/test/pkg/pod/pod.go
index f6c6f9f..a990297 100644
--- a/test/pkg/pod/pod.go
+++ b/test/pkg/pod/pod.go
@@ -75,11 +75,50 @@ func RunToCompletion(ctx context.Context, cs kubernetes.Interface, pod *v1.Pod)
 func WaitToBeRunning(ctx context.Context, cs kubernetes.Interface, podNamespace, podName string) error {
 	phase, err := WaitForPhase(ctx, cs, podNamespace, podName, v1.PodRunning)
 	if err != nil {
-		return fmt.Errorf("pod=%s/%s is not Running; phase=%q; %w", podNamespace, podName, phase, err)
+		hint := formatPodEventsHint(ctx, cs, podNamespace, podName)
+		return fmt.Errorf("pod=%s/%s is not Running; phase=%q; %w%s", podNamespace, podName, phase, err, hint)
 	}
 	return nil
 }
 
+// formatPodEventsHint returns a short hint (pod events, newest first) for debugging Pending pods. Safe to call on failure.
+func formatPodEventsHint(ctx context.Context, cs kubernetes.Interface, podNamespace, podName string) string {
+	eventList, err := cs.CoreV1().Events(podNamespace).List(ctx, metav1.ListOptions{
+		FieldSelector: "involvedObject.name=" + podName,
+		Limit:         15,
+	})
+	if err != nil || len(eventList.Items) == 0 {
+		return ""
+	}
+	var b strings.Builder
+	b.WriteString("; pod events (newest first): ")
+	const maxHintLen = 600
+	for i := len(eventList.Items) - 1; i >= 0 && b.Len() < maxHintLen; i-- {
+		e := eventList.Items[i]
+		if b.Len() > 30 {
+			b.WriteString("; ")
+		}
+		b.WriteString("[")
+		b.WriteString(string(e.Type))
+		b.WriteString("] ")
+		b.WriteString(e.Reason)
+		if e.Message != "" {
+			msg := e.Message
+			if len(msg) > 120 {
+				msg = msg[:117] + "..."
+			}
+			b.WriteString(": ")
+			b.WriteString(msg)
+		}
+		if !e.LastTimestamp.IsZero() {
+			b.WriteString(" (")
+			b.WriteString(e.LastTimestamp.Format("15:04:05"))
+			b.WriteString(")")
+		}
+	}
+	return b.String()
+}
+
 func WaitToBeDeleted(ctx context.Context, cs kubernetes.Interface, podNamespace, podName string) error {
 	immediate := true
 	err := wait.PollUntilContextTimeout(ctx, PollInterval, PollTimeout, immediate, func(ctx2 context.Context) (done bool, err error) {