Skip to content

Commit 52652d0

Browse files
Merge pull request #1172 from perdasilva/ocpbugs-67301
OCPBUGS-67301: Fix TOCTOU race condition in ensureInstallPlan (#3682)
2 parents 12fbe1b + 3b968a1 commit 52652d0

File tree

4 files changed

+125
-12
lines changed

4 files changed

+125
-12
lines changed

staging/operator-lifecycle-manager/pkg/controller/operators/catalog/operator.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,12 +1639,6 @@ func (o *Operator) ensureInstallPlan(logger *logrus.Entry, namespace string, gen
16391639
return nil, nil
16401640
}
16411641

1642-
// Check if any existing installplans are creating the same resources
1643-
installPlans, err := o.listInstallPlans(namespace)
1644-
if err != nil {
1645-
return nil, err
1646-
}
1647-
16481642
// There are multiple(2) worker threads process the namespaceQueue.
16491643
// Both worker can work at the same time when 2 separate updates are made for the namespace.
16501644
// The following sequence causes 2 installplans are created for a subscription
@@ -1665,6 +1659,13 @@ func (o *Operator) ensureInstallPlan(logger *logrus.Entry, namespace string, gen
16651659
o.muInstallPlan.Lock()
16661660
defer o.muInstallPlan.Unlock()
16671661

1662+
// Check if any existing installplans are creating the same resources
1663+
// This must be done inside the lock to prevent TOCTOU race condition
1664+
installPlans, err := o.listInstallPlans(namespace)
1665+
if err != nil {
1666+
return nil, err
1667+
}
1668+
16681669
for _, installPlan := range installPlans {
16691670
if installPlan.Spec.Generation == gen {
16701671
return reference.GetReference(installPlan)

staging/operator-lifecycle-manager/pkg/controller/operators/catalog/operator_test.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"os"
1010
"reflect"
1111
"strings"
12+
"sync"
1213
"testing"
1314
"testing/quick"
1415
"time"
@@ -2462,3 +2463,107 @@ func hasExpectedCondition(ip *v1alpha1.InstallPlan, expectedCondition v1alpha1.I
24622463
}
24632464
return false
24642465
}
2466+
2467+
// TestEnsureInstallPlanConcurrency tests that concurrent calls to ensureInstallPlan
2468+
// do not create duplicate InstallPlans for the same subscription.
2469+
// This test verifies the fix for a TOCTOU race condition where multiple worker threads
2470+
// could create duplicate InstallPlans if they both check for existing plans before either
2471+
// has created one.
2472+
func TestEnsureInstallPlanConcurrency(t *testing.T) {
2473+
namespace := "test-ns"
2474+
gen := 1
2475+
numGoroutines := 10
2476+
2477+
ctx, cancel := context.WithCancel(context.TODO())
2478+
defer cancel()
2479+
2480+
// Create a fake operator
2481+
op, err := NewFakeOperator(ctx, namespace, []string{namespace})
2482+
require.NoError(t, err)
2483+
2484+
// Create a test subscription
2485+
sub := &v1alpha1.Subscription{
2486+
ObjectMeta: metav1.ObjectMeta{
2487+
Name: "test-sub",
2488+
Namespace: namespace,
2489+
UID: types.UID("test-uid"),
2490+
},
2491+
Spec: &v1alpha1.SubscriptionSpec{
2492+
Package: "test-package",
2493+
},
2494+
}
2495+
2496+
// Create test steps for the InstallPlan
2497+
steps := []*v1alpha1.Step{
2498+
{
2499+
Resolving: "test-csv",
2500+
Resource: v1alpha1.StepResource{
2501+
CatalogSource: "test-catalog",
2502+
CatalogSourceNamespace: namespace,
2503+
Group: "operators.coreos.com",
2504+
Version: "v1alpha1",
2505+
Kind: "ClusterServiceVersion",
2506+
Name: "test-csv",
2507+
Manifest: toManifest(t, csv("test-csv", namespace, nil, nil)),
2508+
},
2509+
Status: v1alpha1.StepStatusUnknown,
2510+
},
2511+
}
2512+
2513+
// Use WaitGroup to synchronize goroutines
2514+
var wg sync.WaitGroup
2515+
// Use a channel to collect results
2516+
results := make(chan *corev1.ObjectReference, numGoroutines)
2517+
// Use a sync.Once-like mechanism to start all goroutines at roughly the same time
2518+
startBarrier := make(chan struct{})
2519+
2520+
logger := logrus.NewEntry(logrus.New())
2521+
2522+
// Launch multiple goroutines that will call ensureInstallPlan concurrently
2523+
for i := 0; i < numGoroutines; i++ {
2524+
wg.Add(1)
2525+
go func() {
2526+
defer wg.Done()
2527+
// Wait for the start signal
2528+
<-startBarrier
2529+
2530+
// Call ensureInstallPlan
2531+
ref, err := op.ensureInstallPlan(logger, namespace, gen, []*v1alpha1.Subscription{sub}, v1alpha1.ApprovalAutomatic, steps, nil)
2532+
require.NoError(t, err)
2533+
results <- ref
2534+
}()
2535+
}
2536+
2537+
// Start all goroutines
2538+
close(startBarrier)
2539+
2540+
// Wait for all goroutines to complete
2541+
wg.Wait()
2542+
close(results)
2543+
2544+
// Collect all results
2545+
var refs []*corev1.ObjectReference
2546+
for ref := range results {
2547+
refs = append(refs, ref)
2548+
}
2549+
2550+
// Verify we got the expected number of results
2551+
require.Len(t, refs, numGoroutines, "should have received results from all goroutines")
2552+
2553+
// Verify all refs point to the same InstallPlan
2554+
firstRef := refs[0]
2555+
for i, ref := range refs {
2556+
require.Equal(t, firstRef.Name, ref.Name, "goroutine %d returned different InstallPlan name", i)
2557+
require.Equal(t, firstRef.Namespace, ref.Namespace, "goroutine %d returned different InstallPlan namespace", i)
2558+
require.Equal(t, firstRef.UID, ref.UID, "goroutine %d returned different InstallPlan UID", i)
2559+
}
2560+
2561+
// Verify only one InstallPlan was created in the cluster
2562+
ipList, err := op.client.OperatorsV1alpha1().InstallPlans(namespace).List(ctx, metav1.ListOptions{})
2563+
require.NoError(t, err)
2564+
require.Len(t, ipList.Items, 1, "exactly one InstallPlan should have been created")
2565+
2566+
// Verify the created InstallPlan has the correct generation
2567+
createdIP := &ipList.Items[0]
2568+
require.Equal(t, gen, createdIP.Spec.Generation, "InstallPlan should have the correct generation")
2569+
}

staging/operator-lifecycle-manager/pkg/controller/operators/olm/operator_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5853,6 +5853,12 @@ func RequireObjectsInCache(t *testing.T, lister operatorlister.OperatorLister, n
58535853
fetched, err = lister.RbacV1().RoleBindingLister().RoleBindings(namespace).Get(o.GetName())
58545854
case *v1alpha1.ClusterServiceVersion:
58555855
fetched, err = lister.OperatorsV1alpha1().ClusterServiceVersionLister().ClusterServiceVersions(namespace).Get(o.GetName())
5856+
if err != nil {
5857+
if apierrors.IsNotFound(err) {
5858+
return err
5859+
}
5860+
return errors.Join(err, fmt.Errorf("namespace: %v, error: %v", namespace, err))
5861+
}
58565862
// We don't care about finalizers
58575863
object.(*v1alpha1.ClusterServiceVersion).Finalizers = nil
58585864
fetched.(*v1alpha1.ClusterServiceVersion).Finalizers = nil

vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/catalog/operator.go

Lines changed: 7 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)