-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathe_scraper.py
91 lines (79 loc) · 3.04 KB
/
e_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from bs4 import BeautifulSoup
import requests
import pickle
import cv2 as cv
import numpy as np
#import skimage
import io
from PIL import Image
import time
import matplotlib.pyplot as plt
apikey = '' # enter your api key here
username = '' # enter your username here
header = {"User-Agent": 'simple scrap v.1.1'}
auth = (username, apikey)
threshold = 20 # pages per data file (ramsize dependent)
upper_pagelim = 1020 # just leave it, e only allow up to 750 pages anyway
size = 128 # size of image (all images are rescaled to 1:1 !)
offset = 0 # start at specific page, startpage = offset*threshold
website = 'https://e621.net/posts?page='
tags = '' # enter tag url like e.g. '&tags=rating%3As+dragon+-absurd_res'
class data():
def __init__(self,num,size):
self.id = num
self.images = []
self.gimages = []
self.tags = []
self.img_id = []
self.noise_rdy = False
self.size = size
return
def get_id(self):
return self.id
def add(self, img, tags, img_id):
img = self.transform(img)
self.images.append(img)
self.tags.append(tags)
self.img_id.append(int(img_id))
def save(self):
with open(f'img_{self.id}.pickle', 'wb') as handle:
pickle.dump([self.images, self.tags, self.img_id], handle, protocol=pickle.HIGHEST_PROTOCOL)
def load(self):
with open(f'img_{self.id}.pickle', 'rb') as handle:
self.images, self.tags, self.img_id = pickle.load(handle)
self.gimages = self.images
def show(self, num):
plt.axis("off")
plt.imshow(self.images[num])
plt.title(self.img_id[num])
plt.show()
def get(self):
return (self.images, self.tags, self.img_id)
def clean(self):
self.images = []
self.tags = []
self.img_id = []
def check_id(self, img_id):
return int(img_id) in self.img_id
def transform(self, img):
max = np.argmax(img.shape)
if max == 0:
center = img.shape[max]
dev = img.shape[1]
img = img[int(center/2-dev/2):int(center/2+dev/2),:]
else:
center = img.shape[max]
dev = img.shape[0]
img = img[:,int(center/2-dev/2):int(center/2+dev/2)]
img = cv.resize(img, (self.size,self.size) , interpolation = cv.INTER_AREA)
img = np.expand_dims(img,axis=-1) if len(img.shape) == 2 else img
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY) if img.shape[-1]!=1 else img
img = np.reshape((img-img.min())/(img.max()-img.min()),(1,size,size,1))
return img
def add_gaussian(self, alpha):
nimg = np.empty((0,*self.images[0].shape))
for a, img in zip(np.abs(np.random.normal(0,alpha,len(self.images))), self.images):
gauss = np.random.normal(0,a,img.shape)
img = cv.add(img, gauss)
nimg = np.concatenate((nimg,np.reshape((img-img.min())/(img.max()-img.min()),(1,*img.shape))),axis=0)
return nimg, self.images