From 22ceeaaff9be3e71c5473aeabccdb02705565190 Mon Sep 17 00:00:00 2001 From: Robert Carr Date: Tue, 8 Mar 2022 13:13:22 -0800 Subject: SurfaceFlinger/LayerRenderArea: Hold lock while modifying hierarchy Multiple stability reports from OEMs are reporting a crash in LayerRenderArea. In particular this occurs when destroying the screenshotParentLayer container layer used by LayerRenderArea. This code executes on the main thread without a lock. Our rules for locking in SurfaceFlinger are like this: 1. The main thread will hold the lock when writing to the state, and be the only writer 2. Anyone can read the state if they hold the lock 3. Main thread can read from the state without the lock since it is the only writer. LayerRenderArea's reparentForDrawing operation violates these rules by updating mDrawingParent without holding the lock. If there were some other binder thread which was accessing the hierarchy (say calling getAlpha on a layer in the hierarchy descending from our screenshot layer), we could have a sequence like this: 1. Other thread calls mDrawingParent.promote(), on the wp we provided constructing ReparentForDrawing 2. At the same time we destroy ReparentForDrawing and overwrite the wp 3. This causes wp::~wp and wp::promote to interleave 4. While the wp counters themselves are atomic, remember the object itself is not locked, and so after ~wp the counters that we think are stored in our ~wp could be anything and so we could promote without properly incremeting the reference ccount 5. When the sp returned from promote is destroyed, it could then destroy the layer, and when our original sp from LayerRenderArea is destroyed, we then crash (which is what we see in the traces). It's hard to say what this other thread is. Over time we have tried to move usage of the Layer hierarchy off the Binder threads to achieve a totally lock-free model. At the moment, I can't find spots in tip-of-tree which would race with mDrawingParent in this fashion. The reports are from S, and from OEMs only so it's possible it relates to an OEM modification, or an issue we have already fixed. Despite this we should have a consistent locking model, and until we are ready to get rid of mStateLock, apply it's usage consistently with our three rules of locking. Bug: 220176775 Bug: 223069308 Bug: 223081111 Change-Id: I89c5add585f96b7de1f1b21f76b6ea0c6b0de127 --- services/surfaceflinger/LayerRenderArea.cpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'services/surfaceflinger/LayerRenderArea.cpp') diff --git a/services/surfaceflinger/LayerRenderArea.cpp b/services/surfaceflinger/LayerRenderArea.cpp index a1e14559e9..896f25404d 100644 --- a/services/surfaceflinger/LayerRenderArea.cpp +++ b/services/surfaceflinger/LayerRenderArea.cpp @@ -26,18 +26,12 @@ namespace android { namespace { -struct ReparentForDrawing { - const sp& oldParent; - - ReparentForDrawing(const sp& oldParent, const sp& newParent, - const Rect& drawingBounds) - : oldParent(oldParent) { +void reparentForDrawing(const sp& oldParent, const sp& newParent, + const Rect& drawingBounds) { // Compute and cache the bounds for the new parent layer. newParent->computeBounds(drawingBounds.toFloatRect(), ui::Transform(), - 0.f /* shadowRadius */); + 0.f /* shadowRadius */); oldParent->setChildrenDrawingParent(newParent); - } - ~ReparentForDrawing() { oldParent->setChildrenDrawingParent(oldParent); } }; } // namespace @@ -114,11 +108,19 @@ void LayerRenderArea::render(std::function drawLayers) { } else { // In the "childrenOnly" case we reparent the children to a screenshot // layer which has no properties set and which does not draw. + // We hold the statelock as the reparent-for-drawing operation modifies the + // hierarchy and there could be readers on Binder threads, like dump. sp screenshotParentLayer = mFlinger.getFactory().createContainerLayer( - {&mFlinger, nullptr, "Screenshot Parent"s, 0, LayerMetadata()}); - - ReparentForDrawing reparent(mLayer, screenshotParentLayer, sourceCrop); + {&mFlinger, nullptr, "Screenshot Parent"s, 0, LayerMetadata()}); + { + Mutex::Autolock _l(mFlinger.mStateLock); + reparentForDrawing(mLayer, screenshotParentLayer, sourceCrop); + } drawLayers(); + { + Mutex::Autolock _l(mFlinger.mStateLock); + mLayer->setChildrenDrawingParent(mLayer); + } } } -- cgit v1.2.3-59-g8ed1b