fix(report): prevent state change loss during in-flight ReportState

- Consume stateChanged atomically with the state snapshot under a single Lock
- Restore stateChanged on UpdateTask error so the change is not silently lost
- Collapse the early-return check into the same Lock to avoid triple locking
- Add tests covering the in-flight Fire race and the error-restore path

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Bo-Yi Wu
2026-04-12 11:24:22 +08:00
parent 7031b3507d
commit 2931fe9e48
2 changed files with 116 additions and 13 deletions

View File

@@ -464,18 +464,16 @@ func (r *Reporter) ReportState(reportResult bool) error {
return true
})
r.stateMu.RLock()
changed := r.stateChanged
r.stateMu.RUnlock()
// Early return avoids the expensive proto.Clone on the common no-op path.
if !reportResult && !changed && len(outputs) == 0 {
// Consume stateChanged atomically with the snapshot; restored on error
// below so a concurrent Fire() during UpdateTask isn't silently lost.
r.stateMu.Lock()
if !reportResult && !r.stateChanged && len(outputs) == 0 {
r.stateMu.Unlock()
return nil
}
r.stateMu.RLock()
state := proto.Clone(r.state).(*runnerv1.TaskState)
r.stateMu.RUnlock()
r.stateChanged = false
r.stateMu.Unlock()
if !reportResult {
state.Result = runnerv1.Result_RESULT_UNSPECIFIED
@@ -486,13 +484,12 @@ func (r *Reporter) ReportState(reportResult bool) error {
Outputs: outputs,
}))
if err != nil {
r.stateMu.Lock()
r.stateChanged = true
r.stateMu.Unlock()
return err
}
r.stateMu.Lock()
r.stateChanged = false
r.stateMu.Unlock()
for _, k := range resp.Msg.SentOutputs {
r.outputs.Store(k, struct{}{})
}