Skip to content

Commit

Permalink
Merge branch 'Fix_abstract' into 'develop'
Browse files Browse the repository at this point in the history
Fix abstract

See merge request inf1603/irisaparser!35
  • Loading branch information
Certescertes committed May 22, 2022
2 parents 0eaaadb + 995ac8a commit f71d24e
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions irisaparser/abstract_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
def abstract_extractor(parsed):
abstr = parsed["content"].upper().find('ABSTRACT')
intro = parsed["content"].upper().find('INTRODUCTION')
if intro == -1:
intro = parsed["content"].upper().find('I N T R O D U C T I O N')
end = len(parsed["content"])

text = parsed["content"]
Expand All @@ -18,11 +20,19 @@ def abstract_extractor(parsed):
return x
return "error"

if intro == -1:
splited_text = text[abstr:].split("\n\n")

for x in splited_text:
if len(x) > 200:
return x
return "error"

else:
cut = 0
for x in range(intro, abstr, -1):
if text[x] == "\n" and text[x-1] == "\n":
cut = x
break

return text[abstr+8:cut] if end-intro > 6000 else text[abstr+8:cut].split("\n\n")[0]
return text[abstr+8:cut] if end-intro > 6000 else text[abstr+8:cut].split("\n\n")[0]

0 comments on commit f71d24e

Please sign in to comment.