-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9d3e906
commit 3d0e487
Showing
13 changed files
with
544 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ wandb | |
notebooks | ||
results | ||
data | ||
slices | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
import io | ||
|
||
import pypdfium2 | ||
import streamlit as st | ||
from surya.detection import batch_detection | ||
from surya.model.detection.segformer import load_model, load_processor | ||
from surya.model.recognition.model import load_model as load_rec_model | ||
from surya.model.recognition.processor import load_processor as load_rec_processor | ||
from surya.postprocessing.heatmap import draw_polys_on_image | ||
from surya.ocr import run_ocr | ||
from surya.postprocessing.text import draw_text_on_image | ||
from PIL import Image | ||
from surya.languages import CODE_TO_LANGUAGE | ||
from surya.input.langs import replace_lang_with_code | ||
|
||
|
||
@st.cache_resource() | ||
def load_det_cached(): | ||
return load_model(), load_processor() | ||
|
||
|
||
@st.cache_resource() | ||
def load_rec_cached(): | ||
return load_rec_model(), load_rec_processor() | ||
|
||
|
||
def text_detection(img): | ||
preds = batch_detection([img], det_model, det_processor)[0] | ||
det_img = draw_polys_on_image(preds["polygons"], img.copy()) | ||
return det_img, preds | ||
|
||
|
||
# Function for OCR | ||
def ocr(img, langs): | ||
replace_lang_with_code(langs) | ||
pred = run_ocr([img], [langs], det_model, det_processor, rec_model, rec_processor)[0] | ||
rec_img = draw_text_on_image(pred["bboxes"], pred["text_lines"], img.size) | ||
return rec_img, pred | ||
|
||
|
||
def open_pdf(pdf_file): | ||
stream = io.BytesIO(pdf_file.getvalue()) | ||
return pypdfium2.PdfDocument(stream) | ||
|
||
|
||
@st.cache_data() | ||
def get_page_image(pdf_file, page_num, dpi=96): | ||
doc = open_pdf(pdf_file) | ||
renderer = doc.render( | ||
pypdfium2.PdfBitmap.to_pil, | ||
page_indices=[page_num - 1], | ||
scale=dpi / 72, | ||
) | ||
png = list(renderer)[0] | ||
png_image = png.convert("RGB") | ||
return png_image | ||
|
||
|
||
@st.cache_data() | ||
def page_count(pdf_file): | ||
doc = open_pdf(pdf_file) | ||
return len(doc) | ||
|
||
|
||
st.set_page_config(layout="wide") | ||
col1, col2 = st.columns([.5, .5]) | ||
|
||
det_model, det_processor = load_det_cached() | ||
rec_model, rec_processor = load_rec_cached() | ||
|
||
|
||
st.markdown(""" | ||
# Surya OCR Demo | ||
This app will let you try surya, a multilingual OCR model. It supports text detection in any language, and text recognition in 90+ languages. | ||
Notes: | ||
- This works best on documents with printed text. | ||
- Try to keep the image width around 896, especially if you have large text. | ||
- This supports 90+ languages, see [here](https://github.com/VikParuchuri/surya/tree/master/surya/languages.py) for a full list of codes. | ||
Find the project [here](https://github.com/VikParuchuri/surya). | ||
""") | ||
|
||
in_file = st.sidebar.file_uploader("PDF file or image:", type=["pdf", "png", "jpg", "jpeg", "gif", "webp"]) | ||
languages = st.sidebar.multiselect("Languages", sorted(list(CODE_TO_LANGUAGE.values())), default=["English"], max_selections=4) | ||
|
||
if in_file is None: | ||
st.stop() | ||
|
||
filetype = in_file.type | ||
whole_image = False | ||
if "pdf" in filetype: | ||
page_count = page_count(in_file) | ||
page_number = st.sidebar.number_input(f"Page number out of {page_count}:", min_value=1, value=1, max_value=page_count) | ||
|
||
pil_image = get_page_image(in_file, page_number) | ||
else: | ||
pil_image = Image.open(in_file).convert("RGB") | ||
|
||
text_det = st.sidebar.button("Run Text Detection") | ||
text_rec = st.sidebar.button("Run OCR") | ||
|
||
# Run Text Detection | ||
if text_det and pil_image is not None: | ||
det_img, preds = text_detection(pil_image) | ||
with col1: | ||
st.image(det_img, caption="Detected Text", use_column_width=True) | ||
st.json(preds) | ||
|
||
# Run OCR | ||
if text_rec and pil_image is not None: | ||
rec_img, pred = ocr(pil_image, languages) | ||
with col1: | ||
st.image(rec_img, caption="OCR Result", use_column_width=True) | ||
st.json(pred) | ||
|
||
with col2: | ||
st.image(pil_image, caption="Uploaded Image", use_column_width=True) |
Oops, something went wrong.