Skip to content

Commit

Permalink
[script.subtitles.zimukux] 0.2.1 (#2560)
Browse files Browse the repository at this point in the history
* [script.subtitles.zimukux] 0.2.0

* [script.subtitles.zimukux] 0.2.0

* [script.subtitles.zimukux] 0.2.1

* [script.subtitles.zimukux] 0.2.1
  • Loading branch information
pizzamx authored Jan 1, 2024
1 parent 6d73bd9 commit 08529d6
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 44 deletions.
23 changes: 9 additions & 14 deletions script.subtitles.zimukux/addon.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<addon id="script.subtitles.zimukux" name="ZiMuKuX" version="0.1.9" provider-name="pizzamx">
<addon id="script.subtitles.zimukux" name="ZiMuKuX" version="0.2.1" provider-name="pizzamx">
<requires>
<import addon="xbmc.python" version="3.0.0"/>
<import addon="script.module.beautifulsoup4" version="4.6.2"/>
Expand All @@ -20,6 +20,13 @@
<fanart>resources/fanart.png</fanart>
</assets>
<news>
v0.2.1(2023/12/11)
- Merged https://github.com/pizzamx/zimuku_for_kodi/pull/23(thanks @jiangpengcheng for solving the cookie issue)

v0.2.0(2023/5/30)
- Merged https://github.com/pizzamx/zimuku_for_kodi/pull/18 (big thanks to lm317379829 for solving the captcha issue)
- Let's bump version a bit

v0.1.9(2022/12/23)
- Merged https://github.com/pizzamx/zimuku_for_kodi/pull/14
- New setting allow showing only language suffix of sub file name to prevent the UI from scrolling (slowly!)
Expand All @@ -41,19 +48,7 @@ v0.1.5(2022/4/22)
- Misc. bug fix and code tweaks
- Is COVID-19 gonna last forever?

v0.1.1(2021/6/21)
- Rename addon to script.subtitles.zimukux

V0.1.0 (2021/6/14)
- Episode match logic gets more flexibility
- You can now specify your preferred sub type (srt, ass) and language (be cautious setting this parameter)
- A bit of refactoring to separate crawl logic from Kodi GUI and local file r/w interaction
- Some unit tests

V0.0.3 (2021/5/18)
- Fork from service.subtitles.zimuku V2.0.3
- Episode match for drama to locate the subtitle you need faster
- add support to change site URL on the fly
CHECK https://github.com/pizzamx/zimuku_for_kodi/releases FOR EARLIER RELEASE NOTES...
</news>
</extension>
</addon>
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ msgctxt "#30101"
msgid "Site URL"
msgstr ""

msgctxt "#301011"
msgid "OCR URL"
msgstr "OCR API"

msgctxt "#30200"
msgid "Sub preference"
msgstr ""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ msgctxt "#30101"
msgid "Site URL"
msgstr "网址"

msgctxt "#301011"
msgid "OCR URL"
msgstr "OCR API 的地址(请勿随意修改)"

msgctxt "#30200"
msgid "Sub preference"
msgstr "字幕下载偏好"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,11 +213,13 @@ def handle_params(params):
def run():
global agent, logger

# 获取参数
params = get_params()

logger = Logger()
logger.log(sys._getframe().f_code.co_name, "HANDLE PARAMS:%s" % params)

# 获取url
zimuku_base_url = __addon__.getSetting("ZiMuKuUrl")
tpe = __addon__.getSetting("subtype")
lang = __addon__.getSetting("sublang")
Expand All @@ -227,8 +229,11 @@ def run():
else __addon__.getSetting("proxy_server"))
os.environ["HTTP_PROXY"] = os.environ["HTTPS_PROXY"] = proxy

ocrUrl= __addon__.getSetting("ocr_url")

# 查询
agent = zmkagnt.Zimuku_Agent(zimuku_base_url, __temp__, logger, Unpacker(),
{'subtype': tpe, 'sublang': lang})
{'subtype': tpe, 'sublang': lang}, ocrUrl)

handle_params(params)
xbmcplugin.endOfDirectory(int(sys.argv[1]))
123 changes: 95 additions & 28 deletions script.subtitles.zimukux/resources/lib/zimuku_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,24 @@
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""

from ast import expr_context

import os
import sys
import time
import json
import base64
import urllib

import requests
from bs4 import BeautifulSoup


class Zimuku_Agent:
def __init__(self, base_url, dl_location, logger, unpacker, settings):
def __init__(self, base_url, dl_location, logger, unpacker, settings, ocrUrl=''):
self.ua = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)'
self.ZIMUKU_BASE = base_url
self.INIT_PAGE = base_url + '/?security_verify_data=313932302c31303830'
# self.ZIMUKU_API = '%s/search?q=%%s&vertoken=%%s' % base_url
self.TOKEN_PARAM = 'security_verify_data=313932302c31303830'
self.ZIMUKU_API = '%s/search?q=%%s' % base_url
self.INIT_PAGE = base_url + '/?security_verify_data=313932302c31303830'
self.DOWNLOAD_LOCATION = dl_location
self.FILE_MIN_SIZE = 1024

Expand All @@ -43,22 +43,18 @@ def __init__(self, base_url, dl_location, logger, unpacker, settings):
self.plugin_settings = settings
self.session = requests.Session()
self.vertoken = ''
self.ocrUrl = ocrUrl

# 一次性调用,获取那个vertoken。目测这东西会过期,不过不管那么多了,感觉过两天验证机制又要变
# self.init_site()
# 一次性调用,获取必需的cookies,验证机制可能之后会变
self.init_site()

def set_setting(self, settings):
# for unittestting purpose
self.plugin_settings = settings

def init_site(self):
self.session.cookies.set(
'srcurl', '68747470733a2f2f7a696d756b752e6f72672f')
self.get_page(self.ZIMUKU_BASE)

self.get_page(self.INIT_PAGE)
_, resp = self.get_page(self.ZIMUKU_BASE)
self.get_vertoken(resp)
self.get_page(self.INIT_PAGE)

def get_page(self, url, **kwargs):
"""
Expand Down Expand Up @@ -105,6 +101,58 @@ def get_page(self, url, **kwargs):

return headers, http_body

def verify(self, url, append):
headers = None
http_body = None
s = self.session
try:
request_headers = {'User-Agent': self.ua}

a = requests.adapters.HTTPAdapter(max_retries=3)
s.mount('https://', a)

self.logger.log(sys._getframe().f_code.co_name,
'[CHALLENGE VERI-CODE] requests GET [%s]' % (url), level=3)

http_response = s.get(url, headers=request_headers)

if http_response.status_code != 200:
soup = BeautifulSoup(http_response.content, 'html.parser')
content = soup.find_all(attrs={'class': 'verifyimg'})[
0].get('src')
if content is not None:
# 处理编码
ocrurl = self.ocrUrl
payload = {'imgdata': content}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36'
}
response = requests.request(
"POST", ocrurl, headers=headers, json=payload)
result_json = json.loads(response.text)
text = ''
if result_json['code'] == 1:
text = result_json['result']
str1 = ''
i = 0
for ch in text:
if str1 == '':
str1 = hex(ord(text[i]))
else:
str1 += hex(ord(text[i]))
i = i + 1

# 使用带验证码的访问
get_cookie_url = '%s%s&%s' % (
url, append, 'security_verify_img=' + str1.replace('0x', ''))
http_response = s.get(
get_cookie_url, headers=request_headers)
a = 1

except Exception as e:
self.logger.log(sys._getframe().f_code.co_name,
"ERROR CHALLENGING VERI-CODE(target URL: %s): %s" % (url, e), level=3)

def extract_sub_info(self, sub, lang_info_mode):
"""
从 html 块中解析出字幕信息
Expand Down Expand Up @@ -169,7 +217,7 @@ def extract_sub_info(self, sub, lang_info_mode):
if rating not in ["0", "1", "2", "3", "4", "5"]:
self.logger.log(
sys._getframe().f_code.co_name, "NO RATING AVAILABLE IN (%s), URL: %s" %
(rating_div_str, link),
(rating_div_str, link),
2)
rating = "0"
except:
Expand Down Expand Up @@ -200,7 +248,7 @@ def get_vertoken(self, resp):
self.logger.log(sys._getframe().f_code.co_name,
"Fetching new vertoken form home page")
try:
headers, data = self.get_page(self.ZIMUKU_BASE+'/')
headers, data = self.get_page(self.ZIMUKU_BASE + '/')
hsoup = BeautifulSoup(resp, 'html.parser')
vertoken = hsoup.find(
'input', attrs={'name': 'vertoken'}).attrs.get('value', '')
Expand Down Expand Up @@ -233,17 +281,20 @@ def search(self, title, items):

# vertoken = self.get_vertoken()

get_cookie_url = '%s&%s' % (self.ZIMUKU_API %
(urllib.parse.quote(title)), self.TOKEN_PARAM)
url = self.ZIMUKU_API % urllib.parse.quote(title)
try:
# 10/10/22: 变成搜索要先拿 cookie
self.get_page(url)
self.get_page(get_cookie_url)
# self.get_page(url)
# self.get_page(get_cookie_url)

# 处理验证码逻辑
# self.verify(url, '&chost=zimuku.org')

# 真正的搜索
self.logger.log(sys._getframe().f_code.co_name,
"Search API url: %s" % (url))

url += '&chost=zimuku.org'
_, data = self.get_page(url)
soup = BeautifulSoup(data, 'html.parser')
except Exception as e:
Expand All @@ -268,8 +319,9 @@ def search(self, title, items):
return self.double_filter(subtitle_list, items)

# 2. 直接找不到,看是否存在同一季的链接,进去找
season_name_chn = ('一', '二', '三', '四', '五', '六', '七', '八', '九', '十', '十一', '十二', '十三', '十四', '十五')[
int(items['season']) - 1] if s_e != 'N/A' else 'N/A'
season_name_chn = \
('一', '二', '三', '四', '五', '六', '七', '八', '九', '十', '十一', '十二', '十三', '十四', '十五')[
int(items['season']) - 1] if s_e != 'N/A' else 'N/A'
season_list = soup.find_all("div", class_="item prel clearfix")

page_list = soup.find('div', class_='pagination')
Expand Down Expand Up @@ -305,12 +357,19 @@ def search(self, title, items):
'Error getting sub page', level=3)
return []
subs = soup.tbody.find_all("tr")
unfiltered_sub_list = []
for sub in reversed(subs):
subtitle = self.extract_sub_info(sub, 2)
unfiltered_sub_list.append(subtitle)
sub_name = sub.a.text
if s_e in sub_name.upper():
subtitle_list.append(self.extract_sub_info(sub, 2))
subtitle_list.append(subtitle)
# 如果匹配到了季,那就得返回了,没有就是没有
return self.double_filter(subtitle_list, items)
# 如果没有匹配到,可能整季度的字幕被打包到一个文件中了,那就把所有的结果都返回让用户自己选择
if len(subtitle_list) > 0:
return self.double_filter(subtitle_list, items)
else:
return unfiltered_sub_list

# 精确查找没找到,那就返回所有
subtitle_list = []
Expand Down Expand Up @@ -438,7 +497,7 @@ def download(self, url):
下载并返回字幕文件列表
Params:
url 字幕详情页面,如 http://zimuku.org/detail/155262.html
url 字幕详情页面,如 https://srtku.com/detail/155262.html
Return:
[], [], [] 返回 3 个列表
Expand All @@ -451,15 +510,19 @@ def download(self, url):
supported_archive_exts = (".zip", ".7z", ".tar", ".bz2", ".rar",
".gz", ".xz", ".iso", ".tgz", ".tbz2", ".cbr")
try:
# 处理验证码逻辑
# self.verify(url, '?')

# Subtitle detail page.
headers, data = self.get_page(url)
soup = BeautifulSoup(data, 'html.parser')
url = soup.find("li", class_="dlsub").a.get('href')

if not (url.startswith(('http://', 'https://'))):
url = urllib.parse.urljoin(self.ZIMUKU_BASE, url)
self.logger.log(sys._getframe().f_code.co_name,
"GET SUB DETAIL PAGE: %s" % (url))

# 处理验证码逻辑
# self.verify(url, '?')

# Subtitle download-list page.
headers, data = self.get_page(url)
Expand Down Expand Up @@ -576,6 +639,10 @@ def download_links(self, links, referer):
try:
self.logger.log(sys._getframe().f_code.co_name,
"DOWNLOAD SUBTITLE: %s" % (url))

# 处理验证码逻辑
# self.verify(url, '?')

# Download subtitle one by one until success.
headers, data = self.get_page(url, Referer=referer)

Expand Down Expand Up @@ -606,13 +673,13 @@ def download_links(self, links, referer):
else:
self.logger.log(
sys._getframe().f_code.co_name, 'File received but too small: %s %d bytes' %
(filename, len(data)),
(filename, len(data)),
level=2)
return '', ''
else:
self.logger.log(
sys._getframe().f_code.co_name, 'Failed to download subtitle from all links: %s' %
(referer),
(referer),
level=2)
return '', ''

Expand Down
12 changes: 11 additions & 1 deletion script.subtitles.zimukux/resources/settings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,17 @@
<control type="edit" format="string">
<heading>30101</heading>
</control>
<default>http://zimuku.org</default>
<default>https://srtku.com</default>
<constraints>
<allowempty>false</allowempty>
</constraints>
</setting>
<setting id="ocr_url" type="string" label="301011" help="">
<level>0</level>
<control type="edit" format="string">
<heading>301011</heading>
</control>
<default></default>
<constraints>
<allowempty>false</allowempty>
</constraints>
Expand Down

0 comments on commit 08529d6

Please sign in to comment.