Skip to content

Commit 019d429

Browse files
jcrossleyjenkins
authored and
jenkins
committed
finagle/finagle-core: Add retry context to retried requests
Problem Currently, it's hard to attribute a surge in request volume and failures with a retry storm -- it's a positive feedback loop. In a system with a complex call chain, it can be difficult to know where to look for client-side retries, and these still may not accurately reflect what the server ultimately sees due to multiplicative retries at different levels. Solution Introduce a new context, `c.t.f.Retry`, to signal that a request is a retry. We can then add server-side stats to be able to determine how many retries we receive (and look at the against total request volume). Differential Revision: https://phabricator.twitter.biz/D1228127
1 parent 0f731a3 commit 019d429

File tree

9 files changed

+148
-5
lines changed

9 files changed

+148
-5
lines changed

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ New Features
3636
context per request.
3737
* finagle-netty4: Added support for custom event loop implementations. ``PHAB_ID=D1185136``
3838
* finagle-core: Add low priority offload executor. ``PHAB_ID=D1189064``
39+
* finagle-core: A new context, `c.t.f.Retry` is set for a request if it is a retry. ``PHAB_ID=D1228127``
3940

4041

4142
Bug Fixes

finagle-base-http/src/main/scala/com/twitter/finagle/http/codec/context/HttpContext.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ object HttpContext {
5252
Array[HttpContext](
5353
HttpDeadline,
5454
HttpRequeues,
55-
HttpBackupRequest
55+
HttpBackupRequest,
56+
HttpRetry
5657
)
5758
}
5859

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package com.twitter.finagle.http.codec.context
2+
3+
import com.twitter.finagle.context.Contexts
4+
import com.twitter.finagle.context.RetryContext
5+
import com.twitter.finagle.context.RetryContext.Retry
6+
import com.twitter.util.Try
7+
8+
private object HttpRetry extends HttpContext {
9+
10+
type ContextKeyType = Retry
11+
val key: Contexts.broadcast.Key[Retry] = RetryContext.Ctx
12+
13+
def toHeader(retry: Retry): String = {
14+
"1"
15+
}
16+
17+
def fromHeader(header: String): Try[Retry] = {
18+
RetryContext.returnRetry
19+
}
20+
}

finagle-base-http/src/test/scala/com/twitter/finagle/http/codec/context/HttpContextTest.scala

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import com.twitter.finagle.context.BackupRequest
55
import com.twitter.finagle.context.Contexts
66
import com.twitter.finagle.context.Deadline
77
import com.twitter.finagle.context.Requeues
8+
import com.twitter.finagle.context.RetryContext
89
import com.twitter.finagle.http.Message
910
import com.twitter.finagle.http.Method
1011
import com.twitter.finagle.http.Request
@@ -157,6 +158,23 @@ class HttpContextTest extends AnyFunSuite {
157158
}
158159
}
159160

161+
test("Retry written matches read") {
162+
val m = newMsg()
163+
assert(!RetryContext.isRetry)
164+
RetryContext.withRetry {
165+
HttpContext.write(m)
166+
assert(RetryContext.isRetry)
167+
168+
Contexts.broadcast.letClearAll {
169+
assert(!RetryContext.isRetry)
170+
171+
HttpContext.read(m) {
172+
assert(RetryContext.isRetry)
173+
}
174+
}
175+
}
176+
}
177+
160178
test("invalid context header value causes context to not be set") {
161179
val m = newMsg()
162180
m.headerMap.set("Finagle-Ctx-com.twitter.finagle.foo", ",,,")
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package com.twitter.finagle.context
2+
3+
import com.twitter.io.Buf
4+
import com.twitter.util.Return
5+
import com.twitter.util.Try
6+
7+
/**
8+
* Value-less context indicating that this request is a Retry. We do not include the retry # because
9+
* it can be misleading. Suppose that we have services A -> B -> C. A makes a request to B, which makes
10+
* a request to C, which fails and is retried 3 times. All 3 retries fail, so A retries its request
11+
* to B. When C receives this request, it's really the 4th retry of the request, but because we cannot
12+
* pass context information back up the stack, we are unable to correctly set the # retries in the
13+
* context -- to A, it's the first retry, and to B, it's the first attempt.
14+
*/
15+
private[finagle] object RetryContext {
16+
17+
final class Retry
18+
19+
private val retry = new Retry
20+
val returnRetry = Return(retry)
21+
22+
private final class Context extends Contexts.broadcast.Key[Retry]("c.t.f.Retry") {
23+
def marshal(value: Retry): Buf = Buf.Empty
24+
25+
def tryUnmarshal(buf: Buf): Try[Retry] = {
26+
returnRetry
27+
}
28+
}
29+
30+
val Ctx: Contexts.broadcast.Key[Retry] = new Context
31+
32+
def isRetry: Boolean = {
33+
Contexts.broadcast.get(Ctx).nonEmpty
34+
}
35+
36+
def withRetry[T](f: => T): T = {
37+
Contexts.broadcast.let(Ctx, retry) {
38+
f
39+
}
40+
}
41+
}

finagle-core/src/main/scala/com/twitter/finagle/service/RetryFilter.scala

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,17 @@ package com.twitter.finagle.service
22

33
import com.twitter.conversions.DurationOps._
44
import com.twitter.finagle.Filter.TypeAgnostic
5+
import com.twitter.finagle.context
56
import com.twitter.finagle.param.HighResTimer
6-
import com.twitter.finagle.stats.{NullStatsReceiver, StatsReceiver}
7-
import com.twitter.finagle.tracing.{Annotation, Trace, TraceId}
8-
import com.twitter.finagle.{Backoff, FailureFlags, Filter, Service}
7+
import com.twitter.finagle.stats.NullStatsReceiver
8+
import com.twitter.finagle.stats.StatsReceiver
9+
import com.twitter.finagle.tracing.Annotation
10+
import com.twitter.finagle.tracing.Trace
11+
import com.twitter.finagle.tracing.TraceId
12+
import com.twitter.finagle.Backoff
13+
import com.twitter.finagle.FailureFlags
14+
import com.twitter.finagle.Filter
15+
import com.twitter.finagle.Service
916
import com.twitter.util._
1017

1118
object RetryingService {
@@ -101,7 +108,14 @@ class RetryFilter[Req, Rep](
101108
trace.recordRpc("retry")
102109
}
103110

104-
val svcRep = service(req)
111+
val svcRep = if (isRetriedRequest) {
112+
context.RetryContext.withRetry {
113+
service(req)
114+
}
115+
} else {
116+
service(req)
117+
}
118+
105119
if (trace.isActivelyTracing) {
106120
// we always trace the exception
107121
svcRep.respond {
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package com.twitter.finagle.context
2+
3+
import org.scalatest.funsuite.AnyFunSuite
4+
5+
class RetryContextTest extends AnyFunSuite {
6+
7+
test("Get/Set") {
8+
assert(!RetryContext.isRetry)
9+
RetryContext.withRetry {
10+
assert(RetryContext.isRetry)
11+
}
12+
}
13+
}

finagle-core/src/test/scala/com/twitter/finagle/service/RetryFilterTest.scala

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import com.twitter.finagle.Failure
88
import com.twitter.finagle.FailureFlags
99
import com.twitter.finagle.Service
1010
import com.twitter.finagle.WriteException
11+
import com.twitter.finagle.context.RetryContext
1112
import com.twitter.util._
1213
import org.mockito.ArgumentMatchers.anyObject
1314
import org.mockito.Mockito.times
@@ -92,6 +93,40 @@ class RetryFilterTest extends AnyFunSpec with MockitoSugar with BeforeAndAfter {
9293

9394
describe("RetryFilter") {
9495

96+
it("sets retry context correctly in RetryFilter") {
97+
val timer = new MockTimer
98+
99+
val stats = new InMemoryStatsReceiver()
100+
val service = mock[Service[Int, Int]]
101+
when(service.close(anyObject[Time])) thenReturn Future.Done
102+
when(service(123))
103+
.thenAnswer { _ =>
104+
if (RetryContext.isRetry) Future(321)
105+
else Future.exception(WriteException(new Exception("first attempt")))
106+
}
107+
108+
val policy = RetryPolicy.tries[Try[Nothing]](
109+
2,
110+
{
111+
case Throw(WriteException(_)) => true
112+
})
113+
val filter = new RetryExceptionsFilter[Int, Int](policy, timer, stats)
114+
val retryingService = filter andThen service
115+
116+
Time.withCurrentTimeFrozen { tc =>
117+
val result = retryingService(123)
118+
verify(service, times(1))(123)
119+
assert(!result.isDefined)
120+
121+
tc.advance(1.second)
122+
timer.tick()
123+
124+
verify(service, times(2))(123)
125+
assert(Await.result(result, 5.seconds) == 321)
126+
assert(stats.stat("retries")() == Seq(1))
127+
}
128+
}
129+
95130
it("respects RetryBudget") {
96131
val stats = new InMemoryStatsReceiver()
97132

0 commit comments

Comments
 (0)