Skip to content

Commit ef9581e

Browse files
authored
4520 – Add metrics to code and create Honeycomb status dashboard: parser requests [WIP] (#458)
We are now sending metrics related to parsing requests. We are keeping track of: service_name, parser_name, parsed_host and parsing_status. What we want to answer: total of parsing requests per x amount of time total of parsing requests per x amount of time grouped by partner [TBD: not possible to do right now] total of parsing requests per x amount of time grouped by parser total of parsing requests of an unique host, which does not have a dedicated parser, per x amount of time total of errors and total of successes of our parsing requests per x amount of time Why we are sending the data we are sending: I ended up splitting the metrics so we can get more granular data and more generic data. I did this because it was proving difficult to get the information we wanted on the Honeycomb side. For some cases we wanted a SUM of the value send by the metric, but I think since the otel-collector sends the scraped data every 15 seconds we end up with a bunch of events. The SUM doesn't really show what we need. We can zoom in into specific points in time, and I have set the default granularity to match the otel-collector. All that helps, but doesn't get 100% what we would like. I ended up using MAX instead of SUM. But that was an issue when we had more granular metrics and wanted to know, for example, the amount of parsing requests to the page parser, without breaking it up by host. If we made 2 parsing requests to page but to different hosts, those would be split. And we would get the MAX 1 instead of 2. References: 4396, 4520 PR: 458
1 parent f1d6a96 commit ef9581e

File tree

5 files changed

+31
-36
lines changed

5 files changed

+31
-36
lines changed

app/models/media.rb

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def as_json(options = {})
8686
end
8787
archive_if_conditions_are_met(options, id, cache)
8888
Metrics.schedule_fetching_metrics_from_facebook(self.data, self.url, ApiKey.current&.id)
89-
MetricsService.increment_counter(:media_request_total, labels: { service: 'pender', parser: self.data['provider'] })
89+
parser_requests_metrics
9090
cache.read(id, :json) || cleanup_data_encoding(data)
9191
end
9292

@@ -326,4 +326,20 @@ def archive_if_conditions_are_met(options, id, cache)
326326
self.archive(options.delete(:archivers))
327327
end
328328
end
329+
330+
def parser_requests_metrics
331+
url = RequestHelper.normalize_url(self.url)
332+
333+
MetricsService.increment_counter(:pender_parser_requests_total)
334+
MetricsService.increment_counter(:pender_parser_requests_per_parser, labels: { parser_name: data[:provider], parsing_status: data[:error].nil? ? 'success' : 'error' })
335+
MetricsService.increment_counter(:pender_parser_requests, labels: { parser_name: data[:provider], parsed_host: URI(url).host, parsing_status: data[:error].nil? ? 'success' : 'error' })
336+
if data[:error].nil?
337+
MetricsService.increment_counter(:pender_parser_requests_success)
338+
MetricsService.increment_counter(:pender_parser_requests_success_per_parser, labels: { parser_name: data[:provider] })
339+
else
340+
MetricsService.increment_counter(:pender_parser_requests_error)
341+
MetricsService.increment_counter(:pender_parser_requests_error_per_parser, labels: { parser_name: data[:provider] })
342+
end
343+
end
344+
329345
end
Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
11
Rails.application.reloader.to_prepare do
2-
MetricsService.custom_counter(:media_request_total, 'Count every request made', labels: [:service, :parser])
2+
MetricsService.custom_counter(:pender_parser_requests_total, 'Count parsing requests, gets the full total - does not break them by labels', labels: [:service_name])
3+
MetricsService.custom_counter(:pender_parser_requests_success, 'Count successful parsing requests', labels: [:service_name])
4+
MetricsService.custom_counter(:pender_parser_requests_error, 'Count errored parsing requests', labels: [:service_name])
5+
6+
MetricsService.custom_counter(:pender_parser_requests_per_parser, 'Count parsing requests per parser', labels: [:service_name, :parser_name, :parsing_status])
7+
MetricsService.custom_counter(:pender_parser_requests_success_per_parser, 'Count successful parsing requests per parser', labels: [:service_name, :parser_name])
8+
MetricsService.custom_counter(:pender_parser_requests_error_per_parser, 'Count errored parsing requests per parser', labels: [:service_name, :parser_name])
9+
10+
MetricsService.custom_counter(:pender_parser_requests, 'Count parsing requests - broken by labels', labels: [:service_name, :parser_name, :parsed_host, :parsing_status])
311
end

lib/metrics_service.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ def custom_counter(name, description, labels: [])
66
counter = Prometheus::Client::Counter.new(
77
name,
88
docstring: description,
9-
labels: labels
9+
labels: labels,
10+
preset_labels: { service_name: 'pender' }
1011
)
1112
prometheus_registry.register(counter)
1213
end

test/integration/parsers/page_item_test.rb

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -26,24 +26,6 @@ class PageItemIntegrationTest < ActiveSupport::TestCase
2626
assert_equal '', data['username']
2727
end
2828

29-
test "should parse url with arabic or already encoded chars" do
30-
urls = [
31-
'https://www.aljazeera.net/news/2023/2/9/الشرطة-السويدية-ترفض-منح-إذن-لحرق',
32-
'https://www.aljazeera.net/news/2023/2/9/%D8%A7%D9%84%D8%B4%D8%B1%D8%B7%D8%A9-%D8%A7%D9%84%D8%B3%D9%88%D9%8A%D8%AF%D9%8A%D8%A9-%D8%AA%D8%B1%D9%81%D8%B6-%D9%85%D9%86%D8%AD-%D8%A5%D8%B0%D9%86-%D9%84%D8%AD%D8%B1%D9%82'
33-
]
34-
urls.each do |url|
35-
m = create_media url: url
36-
data = m.as_json
37-
assert_equal 'الشرطة السويدية ترفض منح إذن جديد لحرق المصحف الشريف أمام السفارة التركية.. فما السبب؟', data['title']
38-
assert_equal 'رفضت الشرطة السويدية منح إذن لحرق المصحف الشريف أمام السفارة التركية، قائلة إن ذلك من شأنه “إثارة اضطرابات خطيرة للأمن القومي”.', data['description']
39-
assert_equal '', data['published_at']
40-
assert_equal '', data['username']
41-
assert_match /^https?:\/\/www\.aljazeera\.net$/, data['author_url']
42-
assert_nil data['error']
43-
assert_not_nil data['picture']
44-
end
45-
end
46-
4729
test "should store metatags in an Array" do
4830
m = create_media url: 'https://www.nytimes.com/2017/06/14/us/politics/mueller-trump-special-counsel-investigation.html'
4931
data = m.as_json
@@ -105,18 +87,6 @@ class PageItemIntegrationTest < ActiveSupport::TestCase
10587
assert !data['picture'].blank?
10688
end
10789

108-
test "should parse urls without utf encoding" do
109-
urls = [
110-
'https://www.aljazeera.net/news/2024/1/24/شهيد-بالضفة-والاحتلال-يعتقل-طفلا-حرر',
111-
'https://www.aljazeera.net/news/2024/1/24/%D8%B4%D9%87%D9%8A%D8%AF-%D8%A8%D8%A7%D9%84%D8%B6%D9%81%D8%A9-%D9%88%D8%A7%D9%84%D8%A7%D8%AD%D8%AA%D9%84%D8%A7%D9%84-%D9%8A%D8%B9%D8%AA%D9%82%D9%84-%D8%B7%D9%81%D9%84%D8%A7-%D8%AD%D8%B1%D8%B1',
112-
]
113-
urls.each do |url|
114-
m = create_media url: url
115-
data = m.as_json
116-
assert data['error'].nil?
117-
end
118-
end
119-
12090
test "should use original url when redirected page requires cookie" do
12191
RequestHelper.stubs(:get_html).returns(Nokogiri::HTML("<meta property='og:url' content='https://www.tandfonline.com/action/cookieAbsent'><meta name='pbContext' content=';wgroup:string:Publication Websites;website:website:TFOPB;page:string:Cookie Absent'>"))
12292
url = 'https://doi.org/10.1080/10584609.2019.1619639'

test/lib/metrics_service_test.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,20 @@
33
class MetricsServiceTest < ActiveSupport::TestCase
44
test "custom_counter works for real" do
55
assert_nothing_raised do
6-
MetricsService.custom_counter(:custom_counter, 'Custom counter test', labels: [:test])
6+
MetricsService.custom_counter(:custom_counter, 'Custom counter test', labels: [:service_name, :test])
77
end
88
end
99

1010
test "increment_counter works for real" do
11-
MetricsService.custom_counter(:custom_counter_2, 'Custom counter test', labels: [:test])
11+
MetricsService.custom_counter(:custom_counter_2, 'Custom counter test', labels: [:service_name, :test])
1212

1313
assert_nothing_raised do
1414
MetricsService.increment_counter(:custom_counter_2, labels: [:test])
1515
end
1616
end
1717

1818
test "get_counter works for real" do
19-
custom_counter = MetricsService.custom_counter(:custom_counter_3, 'Custom counter test', labels: [:test])
19+
custom_counter = MetricsService.custom_counter(:custom_counter_3, 'Custom counter test', labels: [:service_name, :test])
2020

2121
assert_nothing_raised do
2222
MetricsService.get_counter(custom_counter, labels: [:test])

0 commit comments

Comments
 (0)