Skip to content

Commit

Permalink
Merge pull request #19 from ZennoLab/task/CML-1508
Browse files Browse the repository at this point in the history
  • Loading branch information
azeriker authored Jun 26, 2024
2 parents 236ed3c + ffd4fba commit 8e1a499
Show file tree
Hide file tree
Showing 17 changed files with 308 additions and 10 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,5 @@ Supported captcha recognition requests:
- [TurnstileProxylessRequest](https://zenno.link/doc-turnstile-en)
- [TurnstileRequest](https://zenno.link/doc-turnstile-proxy-en)
- [RecaptchaComplexImageTaskRequest](https://zenno.link/doc-complextask-rc-en)
- [HcaptchaComplexImageTaskRequest](https://zenno.link/doc-complextask-hc-en)
- [HcaptchaComplexImageTaskRequest](https://zenno.link/doc-complextask-hc-en)
- [DataDomeCustomTaskRequest](https://docs.capmonster.cloud/docs/captchas/datadome)
5 changes: 4 additions & 1 deletion capmonstercloud_client/CapMonsterCloudClient.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
((TurnstileProxylessRequest, TurnstileRequest), getTurnstileTimeouts),
((RecaptchaComplexImageTaskRequest, HcaptchaComplexImageTaskRequest,
FunCaptchaComplexImageTaskRequest), getImage2TextTimeouts),
((DataDomeCustomTaskRequest, DataDomeCustomTaskProxylessRequest), getDatadomeTimeouts),
)


Expand Down Expand Up @@ -70,7 +71,9 @@ async def solve_captcha(self, request: Union[RecaptchaV2EnterpriseProxylessReque
TurnstileRequest,
HcaptchaComplexImageTaskRequest,
RecaptchaComplexImageTaskRequest,
FunCaptchaComplexImageTaskRequest],
FunCaptchaComplexImageTaskRequest,
DataDomeCustomTaskProxylessRequest,
DataDomeCustomTaskRequest],
) -> Dict[str, str]:
'''
Non-blocking method for captcha solving.
Expand Down
5 changes: 4 additions & 1 deletion capmonstercloud_client/GetResultTimeouts.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,7 @@ def getGeetestTimeouts() -> GetResultTimeouts:
return GetResultTimeouts(1, 0, 1, 80)

def getTurnstileTimeouts() -> GetResultTimeouts:
return GetResultTimeouts(1, 0, 1, 80)
return GetResultTimeouts(1, 0, 1, 80)

def getDatadomeTimeouts() -> GetResultTimeouts:
return GetResultTimeouts(1, 0, 1, 80)
5 changes: 5 additions & 0 deletions capmonstercloud_client/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ class ZeroImagesErrors(BaseError):
class TaskNotDefinedError(BaseError):
pass

class ExtraParamsError(BaseError):
pass



class UserAgentNotDefinedError(BaseError):

default_message = 'If "imageUrls" is not defined, then "userAgent" must explicitly specify signature ' \
Expand Down
10 changes: 10 additions & 0 deletions capmonstercloud_client/requests/CustomTaskRequestBase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from typing import Optional, List, Dict

from .baseRequest import BaseRequest

class CustomTaskRequestBase(BaseRequest):
captchaClass: str # Class(subtype) of ComplexImageTask
type: str = "CustomTask" # Recognition task type
websiteUrl: str # Address of a webpage with captcha
userAgent: Optional[str] = None # It is required that you use a signature of a modern browser
domains: Optional[List[str]] = None # Collection with base64 encoded images. Must be populated if <see cref="ImageUrls"/> not.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from typing import Dict, Union
from pydantic import Field, validator

from .DataDomeCustomTaskRequestBase import DataDomeCustomTaskRequestBase

class DataDomeCustomTaskProxylessRequest(DataDomeCustomTaskRequestBase):
metadata : Dict[str, str]

@validator('metadata')
def validate_metadata(cls, value):
if value.get('datadomeCookie') is None:
raise ValueError(f'Expect that datadomeCookie will be defined.')
if value.get('captchaUrl') and value.get('htmlPageBase64'):
raise ValueError(f'Expected only one of [captchaUrl, htmlPageBase64]')
elif value.get('captchaUrl'):
if not isinstance(value.get('captchaUrl'), str):
raise ValueError(f'Expect that type imagesUrls array will be <str>, got {type(value.get("captchaUrl"))}')
return {i: value[i] for i in value if i != 'htmlPageBase64'}
elif value.get('htmlPageBase64'):
if not isinstance(value.get('htmlPageBase64'), str):
raise ValueError(f'Expect that type imagesUrls array will be <str>, got {type(value.get("htmlPageBase64"))}')
return {i: value[i] for i in value if i != 'captchaUrl'}
else:
raise ValueError(f'Expected one of [captchaUrl, htmlPageBase64]')

def getTaskDict(self) -> Dict[str, Union[str, int, bool]]:
task = {}
task['type'] = self.type
task['class'] = self.captchaClass
task['websiteURL'] = self.websiteUrl
task['metadata'] = self.metadata
if self.userAgent is not None:
task['userAgent'] = self.userAgent
if self.domains is not None:
task['domains'] = self.domains
return task
42 changes: 42 additions & 0 deletions capmonstercloud_client/requests/DataDomeCustomTaskRequest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import Dict, Union
from pydantic import Field, validator
from .DataDomeCustomTaskRequestBase import DataDomeCustomTaskRequestBase
from .proxy_info import ProxyInfo

class DataDomeCustomTaskRequest(DataDomeCustomTaskRequestBase, ProxyInfo):
metadata : Dict[str, str]

@validator('metadata')
def validate_metadata(cls, value):
if value.get('datadomeCookie') is None:
raise TypeError(f'Expect that datadomeCookie will be defined.')
if value.get('captchaUrl') and value.get('htmlPageBase64'):
raise TypeError(f'Expected only one of [captchaUrl, htmlPageBase64]')
elif value.get('captchaUrl'):
if not isinstance(value.get('captchaUrl'), str):
raise TypeError(f'Expect that type imagesUrls array will be <str>, got {type(value.get("captchaUrl"))}')
return {i: value[i] for i in value if i != 'htmlPageBase64'}
elif value.get('htmlPageBase64'):
if not isinstance(value.get('htmlPageBase64'), str):
raise TypeError(f'Expect that type imagesUrls array will be <str>, got {type(value.get("htmlPageBase64"))}')
return {i: value[i] for i in value if i != 'captchaUrl'}
else:
raise TypeError(f'Expected one of [captchaUrl, htmlPageBase64]')

def getTaskDict(self) -> Dict[str, Union[str, int, bool]]:
task = {}
task['type'] = self.type
task['class'] = self.captchaClass
task['websiteURL'] = self.websiteUrl
task['proxyType'] = self.proxyType
task['proxyAddress'] = self.proxyAddress
task['proxyPort'] = self.proxyPort
task['proxyLogin'] = self.proxyLogin
task['proxyPassword'] = self.proxyPassword
task['domains'] = self.domains
task['metadata'] = self.metadata
if self.userAgent is not None:
task['userAgent'] = self.userAgent
if self.domains is not None:
task['domains'] = self.domains
return task
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from typing import Dict, Union
from pydantic import Field

from .CustomTaskRequestBase import CustomTaskRequestBase

class DataDomeCustomTaskRequestBase(CustomTaskRequestBase):
type: str = Field(default='CustomTask')
captchaClass: str = Field(default='DataDome')
49 changes: 45 additions & 4 deletions capmonstercloud_client/requests/HcaptchaComplexImageTask.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from typing import Dict, Union
from typing import Dict, Union, List, Optional
from pydantic import Field, validator

from .ComplexImageTaskBase import ComplexImageTaskRequestBase
from ..exceptions import NumbersImagesErrors, ZeroImagesErrors, TaskNotDefinedError
from ..exceptions import NumbersImagesErrors, ZeroImagesErrors, TaskNotDefinedError, ExtraParamsError

class HcaptchaComplexImageTaskRequest(ComplexImageTaskRequestBase):

captchaClass: str = Field(default='hcaptcha')
metadata : Dict[str, str]
exampleImageUrls: Optional[List[str]]
exampleImagesBase64: Optional[List[str]]

@validator('metadata')
def validate_metadata(cls, value):
Expand All @@ -16,13 +18,43 @@ def validate_metadata(cls, value):
else:
return value

@validator('exampleImageUrls')
def validate_urls_array(cls, value):
if value is not None:
if not isinstance(value, (list, tuple)):
raise TypeError(f'Expect that type exampleImageUrls array will be <list> or <tuple>, got {type(value)}')
elif len(value) > 1:
raise NumbersImagesErrors(f'Maximum number of images in list 1, got {len(value)}')
elif not len(value):
raise ZeroImagesErrors(f'At least one image url expected, got {len(value)}')
# Check for each element type
contain_types = [isinstance(x, str) for x in value]
if not all(contain_types):
raise TypeError(f'Next images from imagesUrls array are not string: {contain_types}')
return value

@validator('exampleImagesBase64')
def validate_urls_array(cls, value):
if value is not None:
if not isinstance(value, (list, tuple)):
raise TypeError(f'Expect that type exampleImagesBase64 array will be <list> or <tuple>, got {type(value)}')
elif len(value) > 1:
raise NumbersImagesErrors(f'Maximum number of images in list 1, got {len(value)}')
elif not len(value):
raise ZeroImagesErrors(f'At least one image base64 expected, got {len(value)}')
# Check for each element type
contain_types = [isinstance(x, str) for x in value]
if not all(contain_types):
raise TypeError(f'Next images from imagesBase64 array are not string: {contain_types}')
return value

@validator('imagesUrls')
def validate_urls_array(cls, value):
if value is not None:
if not isinstance(value, (list, tuple)):
raise TypeError(f'Expect that type imagesUrls array will be <list> or <tuple>, got {type(value)}')
elif len(value) > 18:
raise NumbersImagesErrors(f'Maximum numbers images in list 18, got {len(value)}')
raise NumbersImagesErrors(f'Maximum number of images in list 18, got {len(value)}')
elif not len(value):
raise ZeroImagesErrors(f'At least one image url expected, got {len(value)}')
# Check for each element type
Expand All @@ -37,7 +69,7 @@ def validate_images_array(cls, value):
if not isinstance(value, (list, tuple)):
raise TypeError(f'Expect that type imagesBase64 array will be <list> or <tuple>, got {type(value)}')
elif len(value) > 18:
raise NumbersImagesErrors(f'Maximum numbers images in list 18, got {len(value)}')
raise NumbersImagesErrors(f'Maximum number of images in list 18, got {len(value)}')
elif not len(value):
raise ZeroImagesErrors(f'At least one image base64 expected, got {len(value)}')
# Check for each element type
Expand All @@ -64,6 +96,15 @@ def getTaskDict(self) -> Dict[str, Union[str, int, bool]]:

task['metadata'] = self.metadata

if self.exampleImageUrls and self.exampleImagesBase64:
raise ExtraParamsError('Expect only one of [exampleImageUrls, exampleImagesBase64]')

if self.exampleImageUrls is not None:
task['exampleImageUrls'] = self.exampleImageUrls

if self.exampleImagesBase64 is not None:
task['exampleImagesBase64'] = self.exampleImagesBase64

if self.userAgent is not None:
task['userAgent'] = self.userAgent

Expand Down
2 changes: 2 additions & 0 deletions capmonstercloud_client/requests/HcaptchaProxylessRequest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ def getTaskDict(self) -> Dict[str, Union[str, int, bool]]:
task['userAgent'] = self.user_agent
if self.cookies is not None:
task['cookies'] = self.cookies
if self.fallbackToActualUA is not None:
task['fallbackToActualUA'] = self.fallbackToActualUA
return task
2 changes: 2 additions & 0 deletions capmonstercloud_client/requests/HcaptchaRequest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,7 @@ def getTaskDict(self) -> Dict[str, Union[str, int, bool]]:
task['userAgent'] = self.user_agent
if self.cookies is not None:
task['cookies'] = self.cookies
if self.fallbackToActualUA is not None:
task['fallbackToActualUA'] = self.fallbackToActualUA

return task
3 changes: 2 additions & 1 deletion capmonstercloud_client/requests/HcaptchaRequestBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ class HcaptchaRequestBase(BaseRequest):
is_invisible: Optional[bool] = Field(default=None)
data: Optional[str] = Field(default=None)
user_agent: Optional[str] = Field(default=None)
cookies: Optional[str] = Field(default=None)
cookies: Optional[str] = Field(default=None)
fallbackToActualUA: Optional[bool] = Field(default=None)
4 changes: 3 additions & 1 deletion capmonstercloud_client/requests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
from .HcaptchaComplexImageTask import HcaptchaComplexImageTaskRequest
from .RecaptchaComplexImageTask import RecaptchaComplexImageTaskRequest
from .baseRequest import BaseRequest
from .DataDomeCustomTaskRequest import DataDomeCustomTaskRequest
from .DataDomeCustomTaskProxylessRequest import DataDomeCustomTaskProxylessRequest

REQUESTS = ['RecaptchaV2EnterpiseRequest', 'RecaptchaV2EnterpriseProxylessRequest',
'RecaptchaV2ProxylessRequest', 'RecaptchaV2Request', 'RecaptchaV3ProxylessRequest',
'ImageToTextRequest', 'FuncaptchaProxylessRequest', 'FuncaptchaRequest',
'GeetestRequest', 'GeetestProxylessRequest', 'HcaptchaProxylessRequest',
'HcaptchaRequest']
'HcaptchaRequest', 'DataDomeCustomTaskRequest', 'DataDomeCustomTaskProxylessRequest']
2 changes: 1 addition & 1 deletion capmonstercloud_client/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.3.4
1.4.0
37 changes: 37 additions & 0 deletions examples/datadome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os
import time
import asyncio

from capmonstercloudclient.requests import DataDomeCustomTaskProxylessRequest
from capmonstercloudclient import ClientOptions, CapMonsterClient

async def solve_captcha_sync(num_requests):
return [await cap_monster_client.solve_captcha(datadome_request) for _ in range(num_requests)]

async def solve_captcha_async(num_requests):
tasks = [asyncio.create_task(cap_monster_client.solve_captcha(datadome_request))
for _ in range(num_requests)]
return await asyncio.gather(*tasks, return_exceptions=True)

if __name__ == '__main__':
key = os.getenv('API_KEY')
client_options = ClientOptions(api_key=key)
cap_monster_client = CapMonsterClient(options=client_options)
metadata = {'captchaUrl': 'https://geo.captcha-delivery.com/captcha/?initialCid=AHrlqAAAAAMAJxx4dfgwjzwAQW0ctQ%3D%3D&hash=D66B23AC3F48A302A7654416846381&cid=d3k5rbDsu8cq0kmPHISS3hsC3f4qeL_K12~G33PrE4fbkmDYSul6l0Ze_aG5sUHLKG0676UpTv6GFvUgIActglZF33GTodOoRhEDkMMsuWTodlYa3YYQ9xKy9J89PAWh&t=fe&referer=https%3A%2F%2Fantoinevastel.com%2Fbots%2Fdatadome&s=21705&e=04fc682817ba89bf8fa4b18031fa53294fa0fb7449d95c036a1986413e6dfc7d',
'datadomeCookie': 'datadome=d3k5rbDsu8cq0kmPHISS3hsC3f4qeL_K12~G33PrE4fbkmDYSul6l0Ze_aG5sUHLKG0676UpTv6GFvUgIActglZF33GTodOoRhEDkMMsuWTodlYa3YYQ9xKy9J89PAWh'}
datadome_request = DataDomeCustomTaskProxylessRequest(websiteUrl='https://antoinevastel.com/bots/datadome',
metadata=metadata
)
nums = 3

# Sync test
sync_start = time.time()
sync_responses = asyncio.run(solve_captcha_sync(nums))
print(f'average execution time sync {1/((time.time()-sync_start)/nums):0.2f} ' \
f'resp/sec\nsolution: {sync_responses[0]}')

# Async test
async_start = time.time()
async_responses = asyncio.run(solve_captcha_async(nums))
print(f'average execution time async {1/((time.time()-async_start)/nums):0.2f} ' \
f'resp/sec\nsolution: {async_responses[0]}')
41 changes: 41 additions & 0 deletions test/datadome_response_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import unittest
import asyncio
import os

from pydantic.error_wrappers import ValidationError
from capmonstercloudclient.requests import DataDomeCustomTaskProxylessRequest
from capmonstercloudclient import CapMonsterClient, ClientOptions

def get_all_keys(dictionary):
all_values = []
def recursive_items(dictionary):
for key, value in dictionary.items():
if type(value) is dict:
all_values.append(key)
recursive_items(value)
else:
all_values.append(key)
return all_values
return recursive_items(dictionary)

class DataDomeOutsTest(unittest.TestCase):

def testOuts(self):
required_outs = ['domains', 'datadome', 'cookies']
api_key = os.getenv('API_KEY')
options = ClientOptions(api_key=api_key)
client = CapMonsterClient(options)
metadata = {'captchaUrl': 'https://geo.captcha-delivery.com/captcha/?initialCid=AHrlqAAAAAMAJxx4dfgwjzwAQW0ctQ%3D%3D&hash=D66B23AC3F48A302A7654416846381&cid=d3k5rbDsu8cq0kmPHISS3hsC3f4qeL_K12~G33PrE4fbkmDYSul6l0Ze_aG5sUHLKG0676UpTv6GFvUgIActglZF33GTodOoRhEDkMMsuWTodlYa3YYQ9xKy9J89PAWh&t=fe&referer=https%3A%2F%2Fantoinevastel.com%2Fbots%2Fdatadome&s=21705&e=04fc682817ba89bf8fa4b18031fa53294fa0fb7449d95c036a1986413e6dfc7d',
'datadomeCookie': 'datadome=d3k5rbDsu8cq0kmPHISS3hsC3f4qeL_K12~G33PrE4fbkmDYSul6l0Ze_aG5sUHLKG0676UpTv6GFvUgIActglZF33GTodOoRhEDkMMsuWTodlYa3YYQ9xKy9J89PAWh'}

request = DataDomeCustomTaskProxylessRequest(websiteUrl='https://antoinevastel.com/bots/datadome',
metadata=metadata)
result = asyncio.run(client.solve_captcha(request))

for i in required_outs:
self.assertTrue(i in get_all_keys(result))


if __name__ == '__main__':
unittest.main()

Loading

0 comments on commit 8e1a499

Please sign in to comment.