diff --git a/app/models/tag.rb b/app/models/tag.rb index 672d80c8b8..413f6f5004 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -35,7 +35,7 @@ class Tag < ApplicationRecord HASHTAG_LAST_SEQUENCE = '([[:word:]_]*[[:alpha:]][[:word:]_]*)' HASHTAG_NAME_PAT = "#{HASHTAG_FIRST_SEQUENCE}|#{HASHTAG_LAST_SEQUENCE}" - HASHTAG_RE = %r{(?:^|[^/)\w])#(#{HASHTAG_NAME_PAT})}i + HASHTAG_RE = %r{(?<![=/)[:word]])#(#{HASHTAG_NAME_PAT})}i HASHTAG_NAME_RE = /\A(#{HASHTAG_NAME_PAT})\z/i HASHTAG_INVALID_CHARS_RE = /[^[:alnum:]#{HASHTAG_SEPARATORS}]/ diff --git a/spec/models/tag_spec.rb b/spec/models/tag_spec.rb index 4d6e5c380b..80b9c861fc 100644 --- a/spec/models/tag_spec.rb +++ b/spec/models/tag_spec.rb @@ -32,44 +32,48 @@ RSpec.describe Tag do expect(subject.match('https://en.wikipedia.org/wiki/Ghostbusters_(song)#Lawsuit')).to be_nil end + it 'does not match URLs with hashtag-like anchors after an empty query parameter' do + expect(subject.match('https://en.wikipedia.org/wiki/Ghostbusters_(song)?foo=#Lawsuit')).to be_nil + end + it 'matches #aesthetic' do - expect(subject.match('this is #aesthetic').to_s).to eq ' #aesthetic' + expect(subject.match('this is #aesthetic').to_s).to eq '#aesthetic' end it 'matches digits at the start' do - expect(subject.match('hello #3d').to_s).to eq ' #3d' + expect(subject.match('hello #3d').to_s).to eq '#3d' end it 'matches digits in the middle' do - expect(subject.match('hello #l33ts35k').to_s).to eq ' #l33ts35k' + expect(subject.match('hello #l33ts35k').to_s).to eq '#l33ts35k' end it 'matches digits at the end' do - expect(subject.match('hello #world2016').to_s).to eq ' #world2016' + expect(subject.match('hello #world2016').to_s).to eq '#world2016' end it 'matches underscores at the beginning' do - expect(subject.match('hello #_test').to_s).to eq ' #_test' + expect(subject.match('hello #_test').to_s).to eq '#_test' end it 'matches underscores at the end' do - expect(subject.match('hello #test_').to_s).to eq ' #test_' + expect(subject.match('hello #test_').to_s).to eq '#test_' end it 'matches underscores in the middle' do - expect(subject.match('hello #one_two_three').to_s).to eq ' #one_two_three' + expect(subject.match('hello #one_two_three').to_s).to eq '#one_two_three' end it 'matches middle dots' do - expect(subject.match('hello #one·two·three').to_s).to eq ' #one·two·three' + expect(subject.match('hello #one·two·three').to_s).to eq '#one·two·three' end it 'matches ・unicode in ぼっち・ざ・ろっく correctly' do - expect(subject.match('testing #ぼっち・ざ・ろっく').to_s).to eq ' #ぼっち・ざ・ろっく' + expect(subject.match('testing #ぼっち・ざ・ろっく').to_s).to eq '#ぼっち・ざ・ろっく' end it 'matches ZWNJ' do - expect(subject.match('just add #نرمافزار and').to_s).to eq ' #نرمافزار' + expect(subject.match('just add #نرمافزار and').to_s).to eq '#نرمافزار' end it 'does not match middle dots at the start' do @@ -77,7 +81,7 @@ RSpec.describe Tag do end it 'does not match middle dots at the end' do - expect(subject.match('hello #one·two·three·').to_s).to eq ' #one·two·three' + expect(subject.match('hello #one·two·three·').to_s).to eq '#one·two·three' end it 'does not match purely-numeric hashtags' do