Skip to content

Commit

Permalink
Update Woblink - rewrite plugin to use js enabled browser
Browse files Browse the repository at this point in the history
  • Loading branch information
t3d committed May 20, 2014
1 parent 6748b97 commit 8632be2
Showing 1 changed file with 55 additions and 39 deletions.
94 changes: 55 additions & 39 deletions src/calibre/gui2/store/stores/woblink_plugin.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-

from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 7 # Needed for dynamic plugin loading
store_version = 8 # Needed for dynamic plugin loading

__license__ = 'GPL 3'
__copyright__ = '2011-2014, Tomasz Długosz <[email protected]>'
Expand All @@ -16,13 +16,27 @@

from PyQt4.Qt import QUrl

from calibre import browser, url_slash_cleaner
from calibre import url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog

from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.ipc.simple_worker import fork_job, WorkerError

js_browser = '''
from calibre.web.jsbrowser.browser import Browser, Timeout
import urllib
def get_results(url, timeout):
browser = Browser(default_timeout=timeout)
browser.visit(url)
browser.wait_for_element('#nw_content_list')
return browser.html
'''

class WoblinkStore(BasicStoreConfig, StorePlugin):

def open(self, parent=None, detail_item=None, external=False):
Expand All @@ -43,6 +57,7 @@ def open(self, parent=None, detail_item=None, external=False):
d.set_tags(self.config.get('tags', ''))
d.exec_()


def search(self, query, max_results=10, timeout=60):
url = 'http://woblink.com/katalog-ebooki?query=' + urllib.quote_plus(query.encode('utf-8'))
if max_results > 10:
Expand All @@ -51,42 +66,43 @@ def search(self, query, max_results=10, timeout=60):
else:
url += '&limit=20'

br = browser()

counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka "]'):
if counter <= 0:
break

id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href'))
if not id:
continue

cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src'))
title = ''.join(data.xpath('.//h2[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()'))
author = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_opcjezakupu_cena"]/span/text()'))
price = re.sub('\.', ',', price)
formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_formaty"]/span/text()'))

s = SearchResult()
s.cover_url = 'http://woblink.com' + cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price + ' zł'
s.detail_item = id.strip()
s.formats = formats

if 'EPUB DRM' in formats:
s.drm = SearchResult.DRM_LOCKED

counter -= 1
yield s
else:
s.drm = SearchResult.DRM_UNLOCKED

counter -= 1
yield s

try:
results = fork_job(js_browser,'get_results', (url, timeout,), module_is_source_code=True)
except WorkerError as e:
raise Exception('Could not get results: %s'%e.orig_tb)
doc = html.fromstring(xml_to_unicode(str(results['result']),strip_encoding_pats=True)[0])
for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka"]'):
if counter <= 0:
break

id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href'))
if not id:
continue

cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src'))
title = ''.join(data.xpath('.//h2[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()'))
author = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="nw_opcjezakupu_cena"]/text()'))
formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_format"]/span/text()'))

s = SearchResult()
s.cover_url = 'http://woblink.com' + cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price + ' zł'
s.detail_item = id.strip()
s.formats = formats

if 'DRM' in formats:
s.drm = SearchResult.DRM_LOCKED

counter -= 1
yield s
else:
s.drm = SearchResult.DRM_UNLOCKED

counter -= 1
yield s

0 comments on commit 8632be2

Please sign in to comment.