Skip to content

Commit

Permalink
feature-lists: Add sfn templates to fawiki features
Browse files Browse the repository at this point in the history
This enables revscoring to process shortened footnote templates
as part of fawiki articlequality features.

Bug: T314302
  • Loading branch information
kevinbazira committed Sep 2, 2022
1 parent dd720d1 commit bb9e711
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 9 deletions.
38 changes: 31 additions & 7 deletions articlequality/feature_lists/fawiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from revscoring.features import wikitext
from revscoring.features.meta import aggregators
from revscoring.features.modifiers import max, sub, log
from revscoring.features.modifiers import log, max, sub
from revscoring.features.wikitext.datasources import Revision
from revscoring.datasources.meta import mappers, filters

Expand All @@ -26,13 +26,35 @@
"Who|چه کسی|چه‌کسی", name="fawiki.revision.who_templates")
main_article_templates = wikitext.revision.template_names_matching(
"Main|اصلی", name="fawiki.main_article_templates")
CITE_TEMPLATES = [
r"Cite",
r"Harvard[_ ]citation[_ ]no[_ ]brackets", r"harvnb",
r"Harvard citation", r"harv",
r"Harvard citation text", r"harvtxt",
r"Harvcoltxt",
r"Harvcol",
r"Harvcolnb",
r"Harvard citations", r"harvs",
r"Harvp",
r"یادکرد"
]
cite_templates = wikitext.revision.template_names_matching(
r"cite|یادکرد", name="fawiki.revision.cite_templates")
"|".join(CITE_TEMPLATES), name="fawiki.revision.cite_templates")
SFN_TEMPLATES = [
r"Shortened footnote template", r"sfn",
r"Sfnp",
r"Sfnm"
]
shortened_footnote_templates = wikitext.revision.template_names_matching(
"|".join(SFN_TEMPLATES),
name="fawiki.revision.shortened_footnote_templates")
all_ref_tags = shortened_footnote_templates + wikitext.revision.ref_tags
all_cite_templates = cite_templates + shortened_footnote_templates
proportion_of_templated_references = \
cite_templates / max(wikitext.revision.ref_tags, 1)
non_templated_references = max(wikitext.revision.ref_tags - cite_templates, 0)
all_cite_templates / max(all_ref_tags, 1)
non_templated_references = max(all_ref_tags - all_cite_templates, 0)
non_cite_templates = sub(
wikitext.revision.templates, cite_templates,
wikitext.revision.templates, all_cite_templates,
name="fawiki.revision.non_cite_templates"
)

Expand Down Expand Up @@ -70,8 +92,10 @@
image_links / max(wikitext.revision.content_chars, 1),
category_links,
category_links / max(wikitext.revision.content_chars, 1),
cite_templates,
cite_templates / max(wikitext.revision.content_chars, 1),
all_ref_tags,
all_ref_tags / max(wikitext.revision.content_chars, 1),
all_cite_templates,
all_cite_templates / max(wikitext.revision.content_chars, 1),
proportion_of_templated_references,
non_templated_references,
non_templated_references / max(wikitext.revision.content_chars, 1),
Expand Down
5 changes: 3 additions & 2 deletions tests/feature_lists/test_fawiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
def test_cite_templates():
text = """
This is some text with a citation.<ref>{{cite lol|title=Made up}}</ref>
This is some more text. {{foo}} {{{cite}}}
This is some more text. {{foo}} {{{cite}}} {{sfn}} {{Harvard citation}}
I am a new paragraph.<ref>{{cite book|title=The stuff}}</ref>
{{Cite hat|ascii=_n_}}{{یادکرد گربه|ascii=_n_}}
"""
assert solve(fawiki.cite_templates, cache={revision_text: text}) == 4
assert solve(fawiki.all_ref_tags, cache={revision_text: text}) == 4
assert solve(fawiki.all_cite_templates, cache={revision_text: text}) == 6


def test_infobox_templates():
Expand Down

0 comments on commit bb9e711

Please sign in to comment.