Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add regex option #231

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements-hw.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ diffimg==0.2.3
tensorflow>=1.13.1,<1.14
matplotlib>=3.0.2
seaborn>=0.9.0
xeger==0.3.5
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ opencv-python>=4.2.0.32
tqdm>=4.23.0
beautifulsoup4>=4.6.0
diffimg==0.2.3
xeger==0.3.5
66 changes: 22 additions & 44 deletions trdg/data_generator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import random as rnd

from PIL import Image, ImageFilter, ImageStat
from PIL import Image, ImageFilter

from trdg import computer_text_generator, background_generator, distorsion_generator

Expand Down Expand Up @@ -49,9 +49,8 @@ def generate(
output_mask,
word_split,
image_dir,
stroke_width=0,
stroke_fill="#282828",
image_mode="RGB",
save_bbox,
save_bbox_dir
):
image = None

Expand All @@ -77,8 +76,6 @@ def generate(
character_spacing,
fit,
word_split,
stroke_width,
stroke_fill,
)
random_angle = rnd.randint(0 - skewing_angle, skewing_angle)

Expand Down Expand Up @@ -131,7 +128,7 @@ def generate(
resized_img = distorted_img.resize(
(new_width, size - vertical_margin), Image.ANTIALIAS
)
resized_mask = distorted_mask.resize((new_width, size - vertical_margin), Image.NEAREST)
resized_mask = distorted_mask.resize((new_width, size - vertical_margin))
background_width = width if width > 0 else new_width + horizontal_margin
background_height = size
# Vertical text
Expand All @@ -144,7 +141,7 @@ def generate(
(size - horizontal_margin, new_height), Image.ANTIALIAS
)
resized_mask = distorted_mask.resize(
(size - horizontal_margin, new_height), Image.NEAREST
(size - horizontal_margin, new_height), Image.ANTIALIAS
)
background_width = size
background_height = new_height + vertical_margin
Expand Down Expand Up @@ -174,26 +171,6 @@ def generate(
"RGB", (background_width, background_height), (0, 0, 0)
)

##############################################################
# Comparing average pixel value of text and background image #
##############################################################
try:
resized_img_st = ImageStat.Stat(resized_img, resized_mask.split()[2])
background_img_st = ImageStat.Stat(background_img)

resized_img_px_mean = sum(resized_img_st.mean[:2]) / 3
background_img_px_mean = sum(background_img_st.mean) / 3

if abs(resized_img_px_mean - background_img_px_mean) < 15:
print("value of mean pixel is too similar. Ignore this image")

print("resized_img_st \n {}".format(resized_img_st.mean))
print("background_img_st \n {}".format(background_img_st.mean))

return
except Exception as err:
return

#############################
# Place text with alignment #
#############################
Expand Down Expand Up @@ -224,29 +201,19 @@ def generate(
(background_width - new_text_width - margin_right, margin_top),
)

#######################
##################################
# Apply gaussian blur #
#######################
##################################

gaussian_filter = ImageFilter.GaussianBlur(
radius=blur if not random_blur else rnd.randint(0, blur)
)
final_image = background_img.filter(gaussian_filter)
final_mask = background_mask.filter(gaussian_filter)

############################################
# Change image mode (RGB, grayscale, etc.) #
############################################

final_image = final_image.convert(image_mode)
final_mask = final_mask.convert(image_mode)

#####################################
# Generate name for resulting image #
#####################################
# We remove spaces if space_width == 0
if space_width == 0:
text = text.replace(" ", "")
if name_format == 0:
image_name = "{}_{}.{}".format(text, str(index), extension)
mask_name = "{}_{}_mask.png".format(text, str(index))
Expand All @@ -261,12 +228,23 @@ def generate(
image_name = "{}_{}.{}".format(text, str(index), extension)
mask_name = "{}_{}_mask.png".format(text, str(index))

image_name = image_name.replace(" ", "_")
image_name = image_name.replace("/", "-")

mask_name = mask_name.replace(" ", "_")
mask_name = mask_name.replace("/", "-")

# Save the image
if out_dir is not None:
final_image.save(os.path.join(out_dir, image_name))
final_image.convert("RGB").save(os.path.join(out_dir, image_name))
if output_mask == 1:
final_mask.save(os.path.join(out_dir, mask_name))
final_mask.convert("RGB").save(os.path.join(out_dir, mask_name))
else:
if output_mask == 1:
return final_image, final_mask
return final_image
return final_image.convert("RGB"), final_mask.convert("RGB")
return final_image.convert("RGB")

if save_bbox:
with open(os.path.join(save_bbox_dir, image_name.rsplit('.', 1)[0]) + ".txt", "w") as f:
bbox = final_mask.getbbox()
f.write(f"{bbox[0]}, {bbox[1]}, {bbox[2]}, {bbox[3]}") #x1, y1, x2, y2
81 changes: 42 additions & 39 deletions trdg/run.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
import argparse
import errno
import os
import os, errno
import sys

sys.path.append(os.path.join(os.path.dirname(__file__), ".."))

import random as rnd
import string
import sys
from multiprocessing import Pool

from tqdm import tqdm

from trdg.data_generator import FakeTextDataGenerator
from trdg.string_generator import (create_strings_from_dict,
create_strings_from_file,
create_strings_from_wikipedia,
create_strings_randomly)
from trdg.string_generator import (
create_strings_from_dict,
create_strings_from_file,
create_strings_from_wikipedia,
create_strings_randomly,
create_strings_from_regex
)
from trdg.utils import load_dict, load_fonts
from trdg.data_generator import FakeTextDataGenerator
from multiprocessing import Pool


def margins(margin):
Expand Down Expand Up @@ -46,12 +47,20 @@ def parse_arguments():
help="When set, this argument uses a specified text file as source for the text",
default="",
)
parser.add_argument(
"-re",
"--regex",
type=str,
nargs="?",
help="Generate words with some regex. For example emails/account numbers/....",
default="",
)
parser.add_argument(
"-l",
"--language",
type=str,
nargs="?",
help="The language to use, should be fr (French), en (English), es (Spanish), de (German), ar (Arabic), cn (Chinese), ja (Japanese) or hi (Hindi)",
help="The language to use, should be fr (French), en (English), es (Spanish), de (German), ar (Arabic), cn (Chinese), or hi (Hindi)",
default="en",
)
parser.add_argument(
Expand Down Expand Up @@ -264,7 +273,7 @@ def parse_arguments():
type=margins,
nargs="?",
help="Define the margins around the text when rendered. In pixels",
default=(5, 5, 5, 5),
default=(64, 64, 64, 64),
)
parser.add_argument(
"-fi",
Expand Down Expand Up @@ -309,28 +318,18 @@ def parse_arguments():
default=False,
)
parser.add_argument(
"-stw",
"--stroke_width",
type=int,
nargs="?",
help="Define the width of the strokes",
default=0,
)
parser.add_argument(
"-stf",
"--stroke_fill",
type=str,
nargs="?",
help="Define the color of the contour of the strokes, if stroke_width is bigger than 0",
default="#282828",
"-bb",
"--save_bbox",
action="store_true",
help="generates and saves bounding boxes of texts",
default=False
)
parser.add_argument(
"-im",
"--image_mode",
"-bbdir",
"--save_bbox_dir",
type=str,
nargs="?",
help="Define the image mode to be used. RGB is default, L means 8-bit grayscale images, 1 means 1-bit binary images stored with one pixel per byte, etc.",
default="RGB",
help="Directory path to save bounding boxes",
default="./bboxes"
)
return parser.parse_args()

Expand All @@ -350,6 +349,13 @@ def main():
if e.errno != errno.EEXIST:
raise

if args.save_bbox:
try:
os.makedirs(args.save_bbox_dir)
except OSError as e:
if e.errno != errno.EEXIST:
raise

# Creating word list
if args.dict:
lang_dict = []
Expand Down Expand Up @@ -383,6 +389,8 @@ def main():
strings = create_strings_from_wikipedia(args.length, args.count, args.language)
elif args.input_file != "":
strings = create_strings_from_file(args.input_file, args.count)
elif args.regex:
strings = create_strings_from_regex(args.length, args.regex, args.count)
elif args.random_sequences:
strings = create_strings_randomly(
args.length,
Expand All @@ -407,11 +415,10 @@ def main():

if args.language == "ar":
from arabic_reshaper import ArabicReshaper
from bidi.algorithm import get_display

arabic_reshaper = ArabicReshaper()
strings = [
" ".join([get_display(arabic_reshaper.reshape(w)) for w in s.split(" ")[::-1]])
" ".join([arabic_reshaper.reshape(w) for w in s.split(" ")[::-1]])
for s in strings
]
if args.case == "upper":
Expand Down Expand Up @@ -452,9 +459,8 @@ def main():
[args.output_mask] * string_count,
[args.word_split] * string_count,
[args.image_dir] * string_count,
[args.stroke_width] * string_count,
[args.stroke_fill] * string_count,
[args.image_mode] * string_count,
[args.save_bbox] * string_count,
[args.save_bbox_dir] * string_count,
),
),
total=args.count,
Expand All @@ -469,10 +475,7 @@ def main():
) as f:
for i in range(string_count):
file_name = str(i) + "." + args.extension
label = strings[i]
if args.space_width == 0:
label = label.replace(" ", "")
f.write("{} {}\n".format(file_name, label))
f.write("{} {}\n".format(file_name, strings[i]))


if __name__ == "__main__":
Expand Down
11 changes: 11 additions & 0 deletions trdg/string_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import requests

from bs4 import BeautifulSoup
from xeger import Xeger


def create_strings_from_file(filename, count):
Expand Down Expand Up @@ -139,3 +140,13 @@ def create_strings_randomly(length, allow_variable, count, let, num, sym, lang):
current_string += " "
strings.append(current_string[:-1])
return strings


def create_strings_from_regex(length, regex, count):
re = Xeger(limit=length)
strings = []

for _ in range(0, count):
current_string = re.xeger(regex)
strings.append(current_string)
return strings