Skip to content

Commit

Permalink
Make DuplicateArticleDeleter class delete all articles with duplicate…
Browse files Browse the repository at this point in the history
… title and namespace except for the most recently created. This fixes a previous bug whereby the newest article was deleted, and the one kept was the oldest, due to the default order for ActiveRecord.
  • Loading branch information
gabina committed Feb 3, 2025
1 parent bb7b4ad commit da385ac
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 17 deletions.
3 changes: 2 additions & 1 deletion lib/duplicate_article_deleter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ def delete_duplicates_in(article_group)
def delete_duplicates(title, ns)
articles = Article.where(title:, namespace: ns, wiki_id: @wiki.id, deleted: false)
.order(:created_at)
keeper = articles.first
# Default order is ascendent, so we want to keep the last article
keeper = articles.last
return [] if keeper.nil?

# Here we must verify that the titles match, since searching by title is case-insensitive.
Expand Down
32 changes: 16 additions & 16 deletions spec/lib/duplicate_article_deleter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,26 +31,26 @@
deleted: true)
end

it 'marks one deleted when there are two ids for one page' do
first = create(:article,
id: 2262715,
title: 'Kostanay',
namespace: 0,
created_at: 1.day.from_now)
second = create(:article,
id: 46349871,
title: 'Kostanay',
namespace: 0,
created_at: 1.day.ago)
described_class.new.resolve_duplicates([first])
undeleted = Article.where(
it 'marks oldest one deleted when there are two ids for one page' do
new_article = create(:article,
id: 2262715,
title: 'Kostanay',
namespace: 0,
created_at: 1.day.from_now)
duplicate_article = create(:article,
id: 46349871,
title: 'Kostanay',
namespace: 0,
created_at: 1.day.ago)
described_class.new.resolve_duplicates([new_article])
deleted = Article.where(
title: 'Kostanay',
namespace: 0,
deleted: false
deleted: true
)

expect(undeleted.count).to eq(1)
expect(undeleted.first.id).to eq(second.id)
expect(deleted.count).to eq(1)
expect(deleted.first.id).to eq(duplicate_article.id)
end

it 'does not mark any deleted when articles different in title case' do
Expand Down

0 comments on commit da385ac

Please sign in to comment.