Skip to content

Commit d6872e1

Browse files
Tristan Pollittjenkins
Tristan Pollitt
authored and
jenkins
committed
finagle/finagle-zipkin-core: Introduce local tracing implementation with duration filter and sampling
This phab introduces `DurationFilteringTracer`, which can be used to collect traces locally and write them to a file, with a configured sample rate and span duration threshold. The implementation converts Finagle spans to the zipkin2 json format, so that they can be uploaded to the Zipkin ui or Grafana for analysis. For duration filtering, we need to keep sampled spans in memory until either a parent span completes with a duration > than the configured threshold, or the topmost span of the trace is received. I use a Caffeine cache for this, so that we won't run out of memory if the sampling rate is set too high, or if there are orphaned spans. Differential Revision: https://phabricator.twitter.biz/D1182242
1 parent 49feab7 commit d6872e1

File tree

4 files changed

+299
-1
lines changed

4 files changed

+299
-1
lines changed

build.sbt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ val scroogeLibs = thriftLibs ++ Seq("com.twitter" %% "scrooge-core" % releaseVer
8282

8383
val lz4Lib = "org.lz4" % "lz4-java" % "1.8.0"
8484

85+
val zipkinLibs = Seq(
86+
"io.zipkin.java" % "zipkin" % "1.28.1",
87+
"io.zipkin.zipkin2" % "zipkin" % "2.22.1"
88+
)
89+
8590
def util(which: String) =
8691
"com.twitter" %% ("util-" + which) % releaseVersion excludeAll (ExclusionRule(organization =
8792
"junit"),
@@ -482,7 +487,7 @@ lazy val finagleZipkinCore = Project(
482487
libraryDependencies ++= Seq(
483488
util("codec"),
484489
util("core"),
485-
util("stats")) ++ scroogeLibs ++ jacksonLibs
490+
util("stats")) ++ scroogeLibs ++ jacksonLibs ++ zipkinLibs
486491
).dependsOn(finagleCore % "compile->compile;test->test", finagleThrift)
487492

488493
lazy val finagleZipkinScribe = Project(

finagle-zipkin-core/src/main/scala/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ scala_library(
1212
"3rdparty/jvm/com/fasterxml/jackson/core:jackson-core",
1313
"3rdparty/jvm/com/fasterxml/jackson/core:jackson-databind",
1414
"3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala",
15+
"3rdparty/jvm/io/zipkin/java:zipkin-core",
16+
"3rdparty/jvm/io/zipkin/java:zipkin2",
1517
"3rdparty/jvm/org/apache/thrift:libthrift",
1618
"finagle/finagle-core/src/main",
1719
"finagle/finagle-thrift/src/main/java",
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
package com.twitter.finagle.zipkin.core
2+
3+
import com.github.benmanes.caffeine.cache.Caffeine
4+
import com.github.benmanes.caffeine.cache.RemovalCause
5+
import com.twitter.finagle.stats.NullStatsReceiver
6+
import com.twitter.finagle.stats.StatsReceiver
7+
import com.twitter.finagle.tracing.Record
8+
import com.twitter.finagle.tracing.TraceId
9+
import com.twitter.finagle.zipkin.core.DurationFilteringTracer.BitMask
10+
import com.twitter.finagle.zipkin.core.DurationFilteringTracer.Multiplier
11+
import com.twitter.finagle.zipkin.core.DurationFilteringTracer.SomeFalse
12+
import com.twitter.finagle.zipkin.core.DurationFilteringTracer.SomeTrue
13+
import com.twitter.finagle.zipkin.core.DurationFilteringTracer.salt
14+
import com.twitter.util.Duration
15+
import com.twitter.util.Future
16+
import java.io.FileOutputStream
17+
import java.lang.ThreadLocal
18+
import java.util.concurrent.ConcurrentMap
19+
import org.apache.thrift.TSerializer
20+
import scala.util.Random
21+
import zipkin.internal.ApplyTimestampAndDuration
22+
import zipkin.Codec
23+
import zipkin2.codec.SpanBytesDecoder
24+
import java.lang.{Long => JLong}
25+
import scala.util.Using
26+
import zipkin2.codec.SpanBytesEncoder
27+
28+
object DurationFilteringTracer {
29+
// Use same sampling params here as in com.twitter.finagle.zipkin.core.Sampler
30+
private val Multiplier = (1 << 24).toFloat
31+
private val BitMask = (Multiplier - 1).toInt
32+
private val salt = new Random().nextInt()
33+
34+
private val SomeTrue = Some(true)
35+
private val SomeFalse = Some(false)
36+
}
37+
38+
class DurationFilteringTracer(
39+
duration: Duration,
40+
samplingRate: Float,
41+
outputPath: String,
42+
maxInFlightTraces: Int = 2000,
43+
statsReceiver: StatsReceiver = NullStatsReceiver)
44+
extends RawZipkinTracer {
45+
46+
if (samplingRate < 0 || samplingRate > 1) {
47+
throw new IllegalArgumentException(
48+
"Sample rate not within the valid range of 0-1, was " + samplingRate
49+
)
50+
}
51+
52+
private[this] val persistedSpansCounter = statsReceiver.counter("persistedSpans")
53+
private[this] val evictions = statsReceiver.counter("evictions")
54+
55+
private[this] val thriftSerialiser = ThreadLocal.withInitial(() => new TSerializer())
56+
57+
// map from TraceID -> spans within that trace
58+
private[this] val spanRoots: ConcurrentMap[Long, List[zipkin.Span]] = Caffeine
59+
.newBuilder()
60+
.asInstanceOf[Caffeine[Long, List[zipkin.Span]]]
61+
.maximumSize(maxInFlightTraces)
62+
.evictionListener((_: Long, v: Seq[zipkin.Span], _: RemovalCause) => {
63+
evictions.incr()
64+
})
65+
.build[Long, List[zipkin.Span]].asMap()
66+
67+
// sentinel value that will get set for a trace ID when we've seen at least one span in that trace
68+
// with duration >= threshold
69+
private[this] val durationThresholdMetSentinel = List[zipkin.Span]()
70+
71+
val cacheSizeGauge = statsReceiver.addGauge("cacheSize")(spanRoots.size().floatValue())
72+
73+
override def record(record: Record): Unit = {
74+
if (sampleTrace(record.traceId).contains(true)) {
75+
super.record(record)
76+
}
77+
}
78+
79+
override def sampleTrace(traceId: TraceId): Option[Boolean] = {
80+
// Same as in com.twitter.finagle.zipkin.core.Sampler, except here we don't check if
81+
// the traceId has already had Some(false) set, since we want to consider all traceIds
82+
if (((JLong.hashCode(traceId.traceId.toLong) ^ salt) & BitMask) < samplingRate * Multiplier)
83+
SomeTrue
84+
else
85+
SomeFalse
86+
}
87+
88+
override def getSampleRate: Float = samplingRate
89+
90+
override def sendSpans(spans: Seq[Span]): Future[Unit] = {
91+
spans.map(convertToZipkinSpan).foreach { span =>
92+
if (span.duration >= duration.inMicroseconds) {
93+
val existingSpansForTrace = spanRoots.put(span.traceId, durationThresholdMetSentinel)
94+
persistSpans(span, existingSpansForTrace)
95+
} else {
96+
val existingSpansForTrace = spanRoots.compute(
97+
span.traceId,
98+
{
99+
case (_, null) => List(span) // this is the first span for the trace
100+
case (_, v) if v.eq(durationThresholdMetSentinel) =>
101+
durationThresholdMetSentinel // duration threshold has already been met
102+
case (_, v) =>
103+
v.+:(span) // there are existing spans, but duration threshold not yet met
104+
}
105+
)
106+
107+
if (existingSpansForTrace.eq(durationThresholdMetSentinel)) {
108+
persistSpans(span, List.empty)
109+
}
110+
}
111+
}
112+
113+
Future.Done
114+
}
115+
116+
override def isActivelyTracing(traceId: TraceId): Boolean = sampleTrace(traceId).contains(true)
117+
118+
private[this] def convertToZipkinSpan(span: Span): zipkin.Span = {
119+
val serialisedBytes = thriftSerialiser.get().serialize(span.toThrift)
120+
val zipkinV1ThriftSpan = zipkin.Codec.THRIFT.readSpan(serialisedBytes)
121+
ApplyTimestampAndDuration.apply(zipkinV1ThriftSpan)
122+
}
123+
124+
private[this] def persistSpans(parent: zipkin.Span, children: Seq[zipkin.Span]): Unit = {
125+
val spansToPersist = if (children != null) children :+ parent else Seq(parent)
126+
persistedSpansCounter.incr(spansToPersist.size)
127+
Using(new FileOutputStream(outputPath, true)) { fileOutputStream =>
128+
spansToPersist.foreach { span =>
129+
val converted = convertV1SpanToV2(span)
130+
fileOutputStream.write(
131+
SpanBytesEncoder.JSON_V2
132+
.encode(converted))
133+
fileOutputStream.write('\n')
134+
}
135+
fileOutputStream.flush()
136+
}
137+
}
138+
139+
private[this] def convertV1SpanToV2(span: zipkin.Span): zipkin2.Span = {
140+
val spanBytesV1 = Codec.THRIFT.writeSpan(span)
141+
SpanBytesDecoder.THRIFT.decodeOne(spanBytesV1)
142+
}
143+
}
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
package com.twitter.finagle.zipkin.core
2+
package unit
3+
4+
import org.scalatest.FunSuite
5+
import com.twitter.conversions.DurationOps._
6+
import com.twitter.finagle.tracing.Trace
7+
import com.twitter.finagle.util.DefaultTimer
8+
import com.twitter.util.Await
9+
import com.twitter.util.Duration
10+
import com.twitter.util.Future
11+
import com.twitter.util.FuturePool
12+
import com.twitter.util.Futures
13+
import com.twitter.util.Promise
14+
import com.twitter.util.Return
15+
import java.io.File
16+
import java.util.concurrent.CountDownLatch
17+
import java.util.concurrent.Executors
18+
import org.scalatest.concurrent.Eventually
19+
import org.scalatest.time.Seconds
20+
import org.scalatest.time.Span
21+
import scala.io.Source
22+
import zipkin2.codec.SpanBytesDecoder
23+
24+
class DurationFilteringTracerSpec extends FunSuite with Eventually {
25+
26+
implicit val config: PatienceConfig = PatienceConfig(timeout = scaled(Span(5, Seconds)))
27+
private val futurePool = FuturePool(Executors.newFixedThreadPool(100))
28+
private implicit val timer = DefaultTimer
29+
30+
test("Test only persists spans with duration greater than 100 milliseconds") {
31+
val tempFile = File.createTempFile("traces", ".json")
32+
val tracer = new DurationFilteringTracer(100.millis, 1F, tempFile.getPath)
33+
34+
Trace.letTracers(Seq(tracer)) {
35+
val future1 = Trace.traceLocalFuture("test") {
36+
taskWithDuration(10.millis)
37+
}
38+
39+
val future2 = Trace.traceLocalFuture("test2") {
40+
taskWithDuration(100.millis)
41+
}
42+
43+
Await.ready(Futures.join(future1, future2))
44+
}
45+
46+
eventually {
47+
val parsedSpans = parseSpansFromFile(tempFile.getPath)
48+
assert(parsedSpans.size == 1)
49+
assert(parsedSpans.head.name() == "test2")
50+
}
51+
}
52+
53+
test("Test persists all child spans where parent has duration greater than 100 milliseconds") {
54+
val tempFile = File.createTempFile("traces", ".json")
55+
val tracer = new DurationFilteringTracer(100.millis, 1F, tempFile.getPath)
56+
57+
Trace.letTracers(Seq(tracer)) {
58+
val future1 = Trace.traceLocalFuture("test") {
59+
taskWithDuration(10.millis)
60+
}
61+
62+
val future2 = Trace.traceLocalFuture("test2") {
63+
taskWithDuration(
64+
100.millis,
65+
childTask = Futures
66+
.join(
67+
Trace.traceLocalFuture("test3") {
68+
taskWithDuration(10.millis)
69+
},
70+
Trace.traceLocalFuture("test4") {
71+
taskWithDuration(10.millis)
72+
}).unit
73+
)
74+
}
75+
76+
Await.ready(Futures.join(future1, future2))
77+
}
78+
79+
eventually {
80+
val parsedSpans = parseSpansFromFile(tempFile.getPath)
81+
assert(parsedSpans.size == 3)
82+
assert(parsedSpans.exists(_.name() == "test2"))
83+
assert(parsedSpans.exists(_.name() == "test3"))
84+
assert(parsedSpans.exists(_.name() == "test4"))
85+
}
86+
}
87+
88+
test("Keeps limited number of spans in memory") {
89+
val tempFile = File.createTempFile("traces", ".json")
90+
val tracer =
91+
new DurationFilteringTracer(100.millis, 1F, tempFile.getPath, maxInFlightTraces = 99)
92+
93+
val latch = new CountDownLatch(100)
94+
val barrier = Promise[Unit]()
95+
96+
val tasks = Trace.letTracers(Seq(tracer)) {
97+
for (i <- 0.until(100)) yield {
98+
Trace.traceLocalFuture(s"task$i")(futurePool { // top level tasks
99+
Trace.traceLocalFuture(s"subtask$i")(Future()) // bottom level tasks
100+
latch.countDown()
101+
Await.ready(barrier)
102+
})
103+
}
104+
}
105+
106+
latch.await()
107+
Thread.sleep(100) // ensures tasks exceed 100ms threshold
108+
barrier.update(Return())
109+
Await.ready(Future.join(tasks))
110+
111+
eventually {
112+
val parsedSpans = parseSpansFromFile(tempFile.getPath)
113+
assert(parsedSpans.size < 200) // all 100 top level spans at most 99 bottom level spans
114+
assert(parsedSpans.count(_.name().contains("subtask")) < 100)
115+
}
116+
}
117+
118+
test("Only collects limited number of spans based on sample rate") {
119+
val tempFile = File.createTempFile("traces", ".json")
120+
121+
val tracer = new DurationFilteringTracer(0.millis, 0.1F, tempFile.getPath)
122+
123+
Trace.letTracers(Seq(tracer)) {
124+
for (i <- 0 until 100) {
125+
Trace.traceLocalFuture(s"task$i")(Future())
126+
}
127+
}
128+
129+
eventually {
130+
val parsedSpans = parseSpansFromFile(tempFile.getPath)
131+
assert(parsedSpans.size < 30)
132+
}
133+
}
134+
135+
private[this] def taskWithDuration(
136+
duration: Duration,
137+
childTask: => Future[Unit] = Future()
138+
): Future[Unit] = {
139+
Future.Unit.delayed(duration).flatMap(_ => childTask)
140+
}
141+
142+
private[this] def parseSpansFromFile(filename: String): Seq[zipkin2.Span] = {
143+
Source
144+
.fromFile(filename)
145+
.getLines()
146+
.map(line => SpanBytesDecoder.JSON_V2.decodeOne(line.getBytes()))
147+
}.toSeq
148+
}

0 commit comments

Comments
 (0)