Skip to content

Commit 1138749

Browse files
committed
Merge branch 'develop'
2 parents 91b10ab + aa7d993 commit 1138749

19 files changed

+160
-86
lines changed

CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
* @caiosba @melsawy @vasconsaurus
1+
* @caiosba @melsawy @vasconsaurus @jayjay-w

Gemfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ gem 'rack-cors', '>= 2.0.2', :require => 'rack/cors'
4747
gem 'rails-perftest'
4848
gem 'sidekiq', '< 8'
4949
gem 'redis', '4.3.1'
50-
gem 'nokogiri', '1.16.2', require: false
50+
gem 'nokogiri', '1.16.5', require: false
5151
gem 'htmlentities', require: false
5252
gem 'rack-protection', '2.0.1'
5353
gem 'loofah', '2.19.1', require: false

Gemfile.lock

Lines changed: 66 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -21,60 +21,60 @@ GIT
2121
GEM
2222
remote: https://rubygems.org/
2323
specs:
24-
actioncable (6.1.7.7)
25-
actionpack (= 6.1.7.7)
26-
activesupport (= 6.1.7.7)
24+
actioncable (6.1.7.8)
25+
actionpack (= 6.1.7.8)
26+
activesupport (= 6.1.7.8)
2727
nio4r (~> 2.0)
2828
websocket-driver (>= 0.6.1)
29-
actionmailbox (6.1.7.7)
30-
actionpack (= 6.1.7.7)
31-
activejob (= 6.1.7.7)
32-
activerecord (= 6.1.7.7)
33-
activestorage (= 6.1.7.7)
34-
activesupport (= 6.1.7.7)
29+
actionmailbox (6.1.7.8)
30+
actionpack (= 6.1.7.8)
31+
activejob (= 6.1.7.8)
32+
activerecord (= 6.1.7.8)
33+
activestorage (= 6.1.7.8)
34+
activesupport (= 6.1.7.8)
3535
mail (>= 2.7.1)
36-
actionmailer (6.1.7.7)
37-
actionpack (= 6.1.7.7)
38-
actionview (= 6.1.7.7)
39-
activejob (= 6.1.7.7)
40-
activesupport (= 6.1.7.7)
36+
actionmailer (6.1.7.8)
37+
actionpack (= 6.1.7.8)
38+
actionview (= 6.1.7.8)
39+
activejob (= 6.1.7.8)
40+
activesupport (= 6.1.7.8)
4141
mail (~> 2.5, >= 2.5.4)
4242
rails-dom-testing (~> 2.0)
43-
actionpack (6.1.7.7)
44-
actionview (= 6.1.7.7)
45-
activesupport (= 6.1.7.7)
43+
actionpack (6.1.7.8)
44+
actionview (= 6.1.7.8)
45+
activesupport (= 6.1.7.8)
4646
rack (~> 2.0, >= 2.0.9)
4747
rack-test (>= 0.6.3)
4848
rails-dom-testing (~> 2.0)
4949
rails-html-sanitizer (~> 1.0, >= 1.2.0)
50-
actiontext (6.1.7.7)
51-
actionpack (= 6.1.7.7)
52-
activerecord (= 6.1.7.7)
53-
activestorage (= 6.1.7.7)
54-
activesupport (= 6.1.7.7)
50+
actiontext (6.1.7.8)
51+
actionpack (= 6.1.7.8)
52+
activerecord (= 6.1.7.8)
53+
activestorage (= 6.1.7.8)
54+
activesupport (= 6.1.7.8)
5555
nokogiri (>= 1.8.5)
56-
actionview (6.1.7.7)
57-
activesupport (= 6.1.7.7)
56+
actionview (6.1.7.8)
57+
activesupport (= 6.1.7.8)
5858
builder (~> 3.1)
5959
erubi (~> 1.4)
6060
rails-dom-testing (~> 2.0)
6161
rails-html-sanitizer (~> 1.1, >= 1.2.0)
62-
activejob (6.1.7.7)
63-
activesupport (= 6.1.7.7)
62+
activejob (6.1.7.8)
63+
activesupport (= 6.1.7.8)
6464
globalid (>= 0.3.6)
65-
activemodel (6.1.7.7)
66-
activesupport (= 6.1.7.7)
67-
activerecord (6.1.7.7)
68-
activemodel (= 6.1.7.7)
69-
activesupport (= 6.1.7.7)
70-
activestorage (6.1.7.7)
71-
actionpack (= 6.1.7.7)
72-
activejob (= 6.1.7.7)
73-
activerecord (= 6.1.7.7)
74-
activesupport (= 6.1.7.7)
65+
activemodel (6.1.7.8)
66+
activesupport (= 6.1.7.8)
67+
activerecord (6.1.7.8)
68+
activemodel (= 6.1.7.8)
69+
activesupport (= 6.1.7.8)
70+
activestorage (6.1.7.8)
71+
actionpack (= 6.1.7.8)
72+
activejob (= 6.1.7.8)
73+
activerecord (= 6.1.7.8)
74+
activesupport (= 6.1.7.8)
7575
marcel (~> 1.0)
7676
mini_mime (>= 1.1.0)
77-
activesupport (6.1.7.7)
77+
activesupport (6.1.7.8)
7878
concurrent-ruby (~> 1.0, >= 1.0.2)
7979
i18n (>= 1.6, < 2)
8080
minitest (>= 5.1)
@@ -106,7 +106,7 @@ GEM
106106
byebug (11.1.3)
107107
codeclimate-test-reporter (1.0.8)
108108
simplecov (<= 0.13)
109-
concurrent-ruby (1.2.3)
109+
concurrent-ruby (1.3.1)
110110
connection_pool (2.4.1)
111111
crack (0.4.5)
112112
rexml
@@ -143,7 +143,7 @@ GEM
143143
heapy (0.2.0)
144144
thor
145145
htmlentities (4.3.4)
146-
i18n (1.14.1)
146+
i18n (1.14.5)
147147
concurrent-ruby (~> 1.0)
148148
jmespath (1.6.2)
149149
json (2.6.3)
@@ -170,7 +170,7 @@ GEM
170170
method_source (1.0.0)
171171
mini_histogram (0.3.1)
172172
mini_mime (1.1.5)
173-
mini_portile2 (2.8.5)
173+
mini_portile2 (2.8.7)
174174
minitest (5.10.1)
175175
minitest-retry (0.2.2)
176176
minitest (>= 5.0)
@@ -187,7 +187,7 @@ GEM
187187
net-smtp (0.4.0.1)
188188
net-protocol
189189
nio4r (2.7.0)
190-
nokogiri (1.16.2)
190+
nokogiri (1.16.5)
191191
mini_portile2 (~> 2.8.2)
192192
racc (~> 1.4)
193193
open_uri_redirections (0.2.1)
@@ -270,7 +270,7 @@ GEM
270270
public_suffix (4.0.7)
271271
puma (5.6.8)
272272
nio4r (~> 2.0)
273-
racc (1.7.3)
273+
racc (1.8.0)
274274
rack (2.2.8.1)
275275
rack-cors (2.0.2)
276276
rack (>= 2.0.0)
@@ -279,20 +279,20 @@ GEM
279279
rack-test (2.1.0)
280280
rack (>= 1.3)
281281
railroady (1.6.0)
282-
rails (6.1.7.7)
283-
actioncable (= 6.1.7.7)
284-
actionmailbox (= 6.1.7.7)
285-
actionmailer (= 6.1.7.7)
286-
actionpack (= 6.1.7.7)
287-
actiontext (= 6.1.7.7)
288-
actionview (= 6.1.7.7)
289-
activejob (= 6.1.7.7)
290-
activemodel (= 6.1.7.7)
291-
activerecord (= 6.1.7.7)
292-
activestorage (= 6.1.7.7)
293-
activesupport (= 6.1.7.7)
282+
rails (6.1.7.8)
283+
actioncable (= 6.1.7.8)
284+
actionmailbox (= 6.1.7.8)
285+
actionmailer (= 6.1.7.8)
286+
actionpack (= 6.1.7.8)
287+
actiontext (= 6.1.7.8)
288+
actionview (= 6.1.7.8)
289+
activejob (= 6.1.7.8)
290+
activemodel (= 6.1.7.8)
291+
activerecord (= 6.1.7.8)
292+
activestorage (= 6.1.7.8)
293+
activesupport (= 6.1.7.8)
294294
bundler (>= 1.15.0)
295-
railties (= 6.1.7.7)
295+
railties (= 6.1.7.8)
296296
sprockets-rails (>= 2.0.0)
297297
rails-controller-testing (1.0.5)
298298
actionpack (>= 5.0.1.rc1)
@@ -305,23 +305,24 @@ GEM
305305
rails-html-sanitizer (1.4.4)
306306
loofah (~> 2.19, >= 2.19.1)
307307
rails-perftest (0.0.7)
308-
railties (6.1.7.7)
309-
actionpack (= 6.1.7.7)
310-
activesupport (= 6.1.7.7)
308+
railties (6.1.7.8)
309+
actionpack (= 6.1.7.8)
310+
activesupport (= 6.1.7.8)
311311
method_source
312312
rake (>= 12.2)
313313
thor (~> 1.0)
314314
rake (13.1.0)
315315
redis (4.3.1)
316-
redis-client (0.18.0)
316+
redis-client (0.22.1)
317317
connection_pool
318318
request_store (1.5.1)
319319
rack (>= 1.4)
320320
responders (3.1.0)
321321
actionpack (>= 5.2)
322322
railties (>= 5.2)
323323
retryable (3.0.5)
324-
rexml (3.2.5)
324+
rexml (3.2.8)
325+
strscan (>= 3.0.9)
325326
rspec-core (3.12.2)
326327
rspec-support (~> 3.12.0)
327328
rspec-expectations (3.12.3)
@@ -364,11 +365,11 @@ GEM
364365
sentry-sidekiq (5.10.0)
365366
sentry-ruby (~> 5.10.0)
366367
sidekiq (>= 3.0)
367-
sidekiq (7.2.0)
368+
sidekiq (7.2.4)
368369
concurrent-ruby (< 2)
369370
connection_pool (>= 2.3.0)
370371
rack (>= 2.2.4)
371-
redis-client (>= 0.14.0)
372+
redis-client (>= 0.19.0)
372373
simplecov (0.13.0)
373374
docile (~> 1.1.0)
374375
json (>= 1.8, < 3)
@@ -386,6 +387,7 @@ GEM
386387
actionpack (>= 5.2)
387388
activesupport (>= 5.2)
388389
sprockets (>= 3.0.0)
390+
strscan (3.1.0)
389391
terminal-table (3.0.2)
390392
unicode-display_width (>= 1.1.1, < 3)
391393
thor (1.3.1)
@@ -409,7 +411,7 @@ GEM
409411
websocket-extensions (0.1.5)
410412
yt (0.25.40)
411413
activesupport
412-
zeitwerk (2.6.13)
414+
zeitwerk (2.6.15)
413415

414416
PLATFORMS
415417
ruby
@@ -432,7 +434,7 @@ DEPENDENCIES
432434
minitest-retry
433435
mocha (~> 1.14.0)
434436
net-http
435-
nokogiri (= 1.16.2)
437+
nokogiri (= 1.16.5)
436438
open_uri_redirections
437439
opentelemetry-exporter-otlp
438440
opentelemetry-instrumentation-action_pack

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,16 @@ To enable sampling for Honeycomb, set the following configuration (either in `co
454454

455455
**Note**: If sampling behavior is changed in Pender, we will also need to update the behavior to match in any other application reporting to Honeycomb. More [here](https://docs.honeycomb.io/getting-data-in/opentelemetry/ruby/#sampling)
456456

457+
### URL Parameters Normalization
458+
459+
Some service providers include URL parameters for tracking purposes that can be safely removed. Pender parsers can define a list of such parameters to be removed during the URL normalization process.
460+
461+
To define URL parameters to be removed, a parser class should implement the `urls_parameters_to_remove` method, which returns an array of strings representing the parameters to be stripped. For example:
462+
463+
```ruby
464+
def urls_parameters_to_remove
465+
['ighs']
466+
end
457467

458468
#### Environment overrides
459469

app/models/concerns/provider_twitter.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def oembed_url(_ = nil)
1414
def tweet_lookup(tweet_id)
1515
params = {
1616
"ids": tweet_id,
17-
"tweet.fields": "author_id,created_at,text",
17+
"tweet.fields": "author_id,created_at,text,lang",
1818
"expansions": "author_id,attachments.media_keys",
1919
"user.fields": "profile_image_url,username,url",
2020
"media.fields": "url",

app/models/media.rb

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def initialize(attributes = {})
6464
self.follow_redirections
6565
self.url = RequestHelper.normalize_url(self.url) unless self.get_canonical_url
6666
self.try_https
67+
self.remove_parser_specific_parameters
6768
self.parser = nil
6869
end
6970

@@ -275,6 +276,37 @@ def try_https
275276
end
276277
end
277278

279+
def remove_parser_specific_parameters
280+
parser_class = self.class.find_parser_class(self.url)
281+
return unless parser_class&.respond_to?(:urls_parameters_to_remove)
282+
283+
params_to_remove = parser_class.urls_parameters_to_remove
284+
return unless params_to_remove.any? { |param| self.url.include?(param) }
285+
286+
uri = URI.parse(self.url)
287+
query_params = URI.decode_www_form(uri.query || '').to_h
288+
289+
params_to_remove.each do |param|
290+
query_params.keys.each do |key|
291+
query_params.delete(key) if key == param
292+
end
293+
end
294+
295+
new_query = query_params.empty? ? nil : URI.encode_www_form(query_params)
296+
uri.query = new_query
297+
298+
result_url = uri.to_s
299+
result_url += '/' if url.end_with?('/') && !result_url.end_with?('/')
300+
self.url = result_url
301+
end
302+
303+
def self.find_parser_class(url)
304+
PARSERS.each do |parser|
305+
return parser if parser.patterns.any? { |pattern| pattern.match?(url) }
306+
end
307+
nil
308+
end
309+
278310
def get_html(header_options = {}, force_proxy = false)
279311
RequestHelper.get_html(self.url, self.method(:set_error), header_options, force_proxy)
280312
end

app/models/parser/dropbox_item.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def type
77

88
def patterns
99
[
10-
/^https?:\/\/(www\.)?dropbox\.com\/sh?\/([^\/]+)/,
10+
/^https?:\/\/(www\.)?dropbox\.com\/([^\/]+)/,
1111
/^https?:\/\/([^\.]+\.)?(dropboxusercontent|dropbox)\.com\/s\/([^\/]+)/,
1212
]
1313
end

app/models/parser/instagram_item.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ def type
1313
def patterns
1414
[INSTAGRAM_ITEM_URL, INSTAGRAM_HOME_URL]
1515
end
16+
17+
def urls_parameters_to_remove
18+
['igsh']
19+
end
1620
end
1721

1822
private

app/models/parser/instagram_profile.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ def type
1212
def patterns
1313
[INSTAGRAM_PROFILE_URL]
1414
end
15+
16+
def urls_parameters_to_remove
17+
['igsh']
18+
end
1519
end
1620

1721
private

app/models/parser/twitter_item.rb

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ module Parser
22
class TwitterItem < Base
33
include ProviderTwitter
44

5-
TWITTER_ITEM_URL = /^https?:\/\/([^\.]+\.)?twitter\.com\/((%23|#)!\/)?(?<user>[^\/]+)\/status\/(?<id>[0-9]+).*/
5+
TWITTER_ITEM_URL = /^https?:\/\/([^\.]+\.)?(twitter|x)\.com\/((%23|#)!\/)?(?<user>[^\/]+)\/status\/(?<id>[0-9]+).*/
66

77
class << self
88
def type
@@ -49,7 +49,7 @@ def parse_data_for_parser(_doc, _original_url, _jsonld_array)
4949
description: raw_data['text'].squish,
5050
author_picture: raw_user_data['profile_image_url'].gsub('_normal', ''),
5151
published_at: raw_data['created_at'],
52-
html: html_for_twitter_item(url),
52+
html: html_for_twitter_item(url,raw_data['lang']),
5353
author_name: raw_user_data['name'],
5454
})
5555
end
@@ -67,11 +67,13 @@ def get_twitter_item_picture(parsed_data)
6767
item_media ? item_media.dig(0, 'url') : ''
6868
end
6969

70-
def html_for_twitter_item(url)
70+
def html_for_twitter_item(url,lang)
7171
'<blockquote class="twitter-tweet">' +
72-
'<a href="' + url + '"></a>' +
72+
'<p lang="' + lang + '" dir="ltr"></p>' +
73+
'<a href="' + url.gsub(/x\.com/, 'twitter.com') + '"></a>' +
7374
'</blockquote>' +
74-
'<script async src="//platform.twitter.com/widgets.js" charset="utf-8"></script>'
75+
'<script async src="https://platform.twitter.com/widgets.js" charset="utf-8">' +
76+
'</script>'
7577
end
7678
end
7779
end

0 commit comments

Comments
 (0)