Skip to content

Commit 5f33cc5

Browse files
authored
feat: Add Metrics to SharpHound to be collected - BED-7080 (#267)
* wip: stub MetricProcessor.cs * wip: cleanup MetricProcessor and Writer * chore: alter histogram value type * fix: create Dictionary Directly * wip: create platform for defining and observing metrics * wip: adding some ldap metrics to LdapUtils and LdapConnectionPool * feat: add FileMetricSink, MetricsFlushTimer, and MetricWriter. Update LabelValues to a struct * feat: refine metric logic * test: fix AdaptiveTimeout LatencyObservation, Add Tests * test: fix AdaptiveTimeout LatencyObservation, Add MetricDefinitionTests.cs and DefaultLabelValuesCacheTests.cs * test: add FileMetricSinkTests * chore: add notes for IsExternalInit.cs * tests: Add MetricAggregatorTests.cs, MetricRegistryTests.cs, and MetricRouterTests.cs * tests: adjusting to relax ranges on AdaptiveTimeoutTests * chore: coderabbit suggestions * chore: more coderabbit suggestions * chore: more coderabbit suggestions * feat: update generics for aggregator to send values * feat: add adaptive timeout metrics * chore: coderabbit suggested changes * chore: coderabbit suggested changes on FileMetricSink * chore: coderabbit suggested changes
1 parent cf999d6 commit 5f33cc5

32 files changed

Lines changed: 1581 additions & 32 deletions

src/CommonLib/AdaptiveTimeout.cs

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
using System.Threading.Tasks;
66
using Microsoft.Extensions.Logging;
77
using SharpHoundCommonLib.Exceptions;
8+
using SharpHoundCommonLib.Interfaces;
9+
using SharpHoundCommonLib.Models;
10+
using SharpHoundCommonLib.Static;
811
using SharpHoundRPC.NetAPINative;
912

1013
namespace SharpHoundCommonLib;
@@ -26,6 +29,7 @@ public sealed class AdaptiveTimeout : IDisposable {
2629
private const int ExcessiveTimeoutsThreshold = 7;
2730
private const int StdDevMultiplier = 7; // 7 standard deviations should be a very conservative upper bound
2831
private const int CountOfLatestSuccessToKeep = 3;
32+
private readonly IMetricRouter _metrics;
2933

3034
public AdaptiveTimeout(TimeSpan maxTimeout, ILogger log, int sampleCount = 100, int logFrequency = 1000, int minSamplesForAdaptiveTimeout = 30, bool useAdaptiveTimeout = true, bool throwIfExcessiveTimeouts = false) {
3135
if (maxTimeout <= TimeSpan.Zero)
@@ -47,6 +51,7 @@ public AdaptiveTimeout(TimeSpan maxTimeout, ILogger log, int sampleCount = 100,
4751
_minSamplesForAdaptiveTimeout = minSamplesForAdaptiveTimeout;
4852
_useAdaptiveTimeout = useAdaptiveTimeout;
4953
_throwIfExcessiveTimeouts = throwIfExcessiveTimeouts;
54+
_metrics = Metrics.Factory.CreateMetricRouter();
5055
}
5156

5257
public AdaptiveTimeout(TimeSpan maxTimeout, TimeSpan minTimeout, ILogger log, int sampleCount = 100, int logFrequency = 1000, int minSamplesForAdaptiveTimeout = 30, bool useAdaptiveTimeout = true, bool throwIfExcessiveTimeouts = false)
@@ -75,15 +80,19 @@ public void ClearSamples() {
7580
/// <typeparam name="T"></typeparam>
7681
/// <param name="func"></param>
7782
/// <param name="parentToken"></param>
83+
/// <param name="latencyObservation">A method that is used to observe the latency of the request.</param>
7884
/// <returns>Returns a Fail result if a task runs longer than its budgeted time.</returns>
79-
public async Task<Result<T>> ExecuteWithTimeout<T>(Func<CancellationToken, T> func, CancellationToken parentToken = default) {
85+
public async Task<Result<T>> ExecuteWithTimeout<T>(Func<CancellationToken, T> func, CancellationToken parentToken = default, Action<double> latencyObservation = null) {
8086
DateTime startTime = default;
8187
var result = await Timeout.ExecuteWithTimeout(GetAdaptiveTimeout(), (timeoutToken) =>
8288
_sampler.SampleExecutionTime(() => {
8389
startTime = DateTime.Now; // for ordinal tracking; see use in TimeSpikeSafetyValve
8490
return func(timeoutToken);
85-
}), parentToken);
91+
}, latencyObservation), parentToken);
8692
TimeSpikeSafetyValve(result.IsSuccess, startTime);
93+
if (!result.IsSuccess) {
94+
_metrics.Observe(AdaptiveTimeoutDefinitions.TimeoutsTotal, 1, new LabelValues());
95+
}
8796
return result;
8897
}
8998

@@ -97,15 +106,19 @@ public async Task<Result<T>> ExecuteWithTimeout<T>(Func<CancellationToken, T> fu
97106
/// </summary>
98107
/// <param name="func"></param>
99108
/// <param name="parentToken"></param>
109+
/// <param name="latencyObservation">A method that is used to observe the latency of the request.</param>
100110
/// <returns>Returns a Fail result if a task runs longer than its budgeted time.</returns>
101-
public async Task<Result> ExecuteWithTimeout(Action<CancellationToken> func, CancellationToken parentToken = default) {
111+
public async Task<Result> ExecuteWithTimeout(Action<CancellationToken> func, CancellationToken parentToken = default, Action<double> latencyObservation = null) {
102112
DateTime startTime = default;
103113
var result = await Timeout.ExecuteWithTimeout(GetAdaptiveTimeout(), (timeoutToken) =>
104114
_sampler.SampleExecutionTime(() => {
105115
startTime = DateTime.Now; // for ordinal tracking; see use in TimeSpikeSafetyValve
106116
func(timeoutToken);
107-
}), parentToken);
117+
}, latencyObservation), parentToken);
108118
TimeSpikeSafetyValve(result.IsSuccess, startTime);
119+
if (!result.IsSuccess) {
120+
_metrics.Observe(AdaptiveTimeoutDefinitions.TimeoutsTotal, 1, new LabelValues());
121+
}
109122
return result;
110123
}
111124

@@ -120,15 +133,19 @@ public async Task<Result> ExecuteWithTimeout(Action<CancellationToken> func, Can
120133
/// <typeparam name="T"></typeparam>
121134
/// <param name="func"></param>
122135
/// <param name="parentToken"></param>
136+
/// <param name="latencyObservation">A method that is used to observe the latency of the request.</param>
123137
/// <returns>Returns a Fail result if a task runs longer than its budgeted time.</returns>
124-
public async Task<Result<T>> ExecuteWithTimeout<T>(Func<CancellationToken, Task<T>> func, CancellationToken parentToken = default) {
138+
public async Task<Result<T>> ExecuteWithTimeout<T>(Func<CancellationToken, Task<T>> func, CancellationToken parentToken = default, Action<double> latencyObservation = null) {
125139
DateTime startTime = default;
126140
var result = await Timeout.ExecuteWithTimeout(GetAdaptiveTimeout(), (timeoutToken) =>
127141
_sampler.SampleExecutionTime(() => {
128142
startTime = DateTime.Now; // for ordinal tracking; see use in TimeSpikeSafetyValve
129143
return func(timeoutToken);
130-
}), parentToken);
144+
}, latencyObservation), parentToken);
131145
TimeSpikeSafetyValve(result.IsSuccess, startTime);
146+
if (!result.IsSuccess) {
147+
_metrics.Observe(AdaptiveTimeoutDefinitions.TimeoutsTotal, 1, new LabelValues());
148+
}
132149
return result;
133150
}
134151

@@ -142,15 +159,19 @@ public async Task<Result<T>> ExecuteWithTimeout<T>(Func<CancellationToken, Task<
142159
/// </summary>
143160
/// <param name="func"></param>
144161
/// <param name="parentToken"></param>
162+
/// <param name="latencyObservation">A method that is used to observe the latency of the request.</param>
145163
/// <returns>Returns a Fail result if a task runs longer than its budgeted time.</returns>
146-
public async Task<Result> ExecuteWithTimeout(Func<CancellationToken, Task> func, CancellationToken parentToken = default) {
164+
public async Task<Result> ExecuteWithTimeout(Func<CancellationToken, Task> func, CancellationToken parentToken = default, Action<double> latencyObservation = null) {
147165
DateTime startTime = default;
148166
var result = await Timeout.ExecuteWithTimeout(GetAdaptiveTimeout(), (timeoutToken) =>
149167
_sampler.SampleExecutionTime(() => {
150168
startTime = DateTime.Now; // for ordinal tracking; see use in TimeSpikeSafetyValve
151169
return func(timeoutToken);
152-
}), parentToken);
170+
}, latencyObservation), parentToken);
153171
TimeSpikeSafetyValve(result.IsSuccess, startTime);
172+
if (!result.IsSuccess) {
173+
_metrics.Observe(AdaptiveTimeoutDefinitions.TimeoutsTotal, 1, new LabelValues());
174+
}
154175
return result;
155176
}
156177

@@ -174,6 +195,9 @@ public async Task<NetAPIResult<T>> ExecuteNetAPIWithTimeout<T>(Func<Cancellation
174195
return func(timeoutToken);
175196
}), parentToken);
176197
TimeSpikeSafetyValve(result.IsSuccess, startTime);
198+
if (!result.IsSuccess) {
199+
_metrics.Observe(AdaptiveTimeoutDefinitions.TimeoutsTotal, 1, new LabelValues());
200+
}
177201
return result;
178202
}
179203

@@ -197,6 +221,9 @@ public async Task<NetAPIResult<T>> ExecuteNetAPIWithTimeout<T>(Func<Cancellation
197221
return func(timeoutToken);
198222
}), parentToken);
199223
TimeSpikeSafetyValve(result.IsSuccess, startTime);
224+
if (!result.IsSuccess) {
225+
_metrics.Observe(AdaptiveTimeoutDefinitions.TimeoutsTotal, 1, new LabelValues());
226+
}
200227
return result;
201228
}
202229

@@ -220,6 +247,9 @@ public async Task<NetAPIResult<T>> ExecuteNetAPIWithTimeout<T>(Func<Cancellation
220247
return func(timeoutToken);
221248
}), parentToken);
222249
TimeSpikeSafetyValve(result.IsSuccess, startTime);
250+
if (!result.IsSuccess) {
251+
_metrics.Observe(AdaptiveTimeoutDefinitions.TimeoutsTotal, 1, new LabelValues());
252+
}
223253
return result;
224254
}
225255

src/CommonLib/ExecutionTimeSampler.cs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,35 +43,39 @@ public double StandardDeviation() {
4343

4444
public double Average() => _samples.Average();
4545

46-
public async Task<T> SampleExecutionTime<T>(Func<Task<T>> func) {
46+
public async Task<T> SampleExecutionTime<T>(Func<Task<T>> func, Action<double> latencyObservation = null) {
4747
var stopwatch = Stopwatch.StartNew();
4848
var result = await func.Invoke();
4949
stopwatch.Stop();
50+
latencyObservation?.Invoke(stopwatch.ElapsedMilliseconds);
5051
AddTimeSample(stopwatch.Elapsed);
5152

5253
return result;
5354
}
5455

55-
public async Task SampleExecutionTime(Func<Task> func) {
56+
public async Task SampleExecutionTime(Func<Task> func, Action<double> latencyObservation = null) {
5657
var stopwatch = Stopwatch.StartNew();
5758
await func.Invoke();
5859
stopwatch.Stop();
60+
latencyObservation?.Invoke(stopwatch.ElapsedMilliseconds);
5961
AddTimeSample(stopwatch.Elapsed);
6062
}
6163

62-
public T SampleExecutionTime<T>(Func<T> func) {
64+
public T SampleExecutionTime<T>(Func<T> func, Action<double> latencyObservation = null) {
6365
var stopwatch = Stopwatch.StartNew();
6466
var result = func.Invoke();
6567
stopwatch.Stop();
68+
latencyObservation?.Invoke(stopwatch.ElapsedMilliseconds);
6669
AddTimeSample(stopwatch.Elapsed);
6770

6871
return result;
6972
}
7073

71-
public void SampleExecutionTime(Action func) {
74+
public void SampleExecutionTime(Action func, Action<double> latencyObservation = null) {
7275
var stopwatch = Stopwatch.StartNew();
7376
func.Invoke();
7477
stopwatch.Stop();
78+
latencyObservation?.Invoke(stopwatch.ElapsedMilliseconds);
7579
AddTimeSample(stopwatch.Elapsed);
7680
}
7781

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
namespace SharpHoundCommonLib.Interfaces;
2+
3+
public interface ILabelValuesCache {
4+
string[] Intern(string[] values);
5+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
namespace SharpHoundCommonLib.Interfaces;
2+
3+
public interface IMetricFactory {
4+
IMetricRouter CreateMetricRouter();
5+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
using System.Collections.Generic;
2+
using SharpHoundCommonLib.Models;
3+
4+
namespace SharpHoundCommonLib.Interfaces;
5+
6+
public interface IMetricRegistry {
7+
bool TryRegister(MetricDefinition definition, out int definitionId);
8+
IReadOnlyList<MetricDefinition> Definitions { get; }
9+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
using SharpHoundCommonLib.Models;
2+
3+
namespace SharpHoundCommonLib.Interfaces;
4+
5+
public interface IMetricRouter {
6+
void Observe(int definitionId, double value, LabelValues labelValues);
7+
void Flush();
8+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
using SharpHoundCommonLib.Models;
2+
3+
namespace SharpHoundCommonLib.Interfaces;
4+
5+
public interface IMetricSink {
6+
void Observe(in MetricObservation.DoubleMetricObservation observation);
7+
void Flush();
8+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
using System;
2+
using System.Text;
3+
using SharpHoundCommonLib.Models;
4+
using SharpHoundCommonLib.Services;
5+
6+
namespace SharpHoundCommonLib.Interfaces;
7+
8+
public interface IMetricWriter {
9+
void StringBuilderAppendMetric(
10+
StringBuilder builder,
11+
MetricDefinition definition,
12+
LabelValues labelValues,
13+
MetricAggregator aggregator,
14+
DateTimeOffset timestamp,
15+
string timestampOutputString = "yyyy-MM-dd HH:mm:ss.fff"
16+
);
17+
}

0 commit comments

Comments
 (0)