Skip to content

Commit

Permalink
Update Woblink get books plugin to deal with website changes
Browse files Browse the repository at this point in the history
Merge branch 'master' of https://github.com/t3d/calibre
  • Loading branch information
kovidgoyal committed May 21, 2014
2 parents 6c1dc2d + 8632be2 commit 35d3531
Showing 1 changed file with 53 additions and 41 deletions.
94 changes: 53 additions & 41 deletions src/calibre/gui2/store/stores/woblink_plugin.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,40 @@
# -*- coding: utf-8 -*-

from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 7 # Needed for dynamic plugin loading
store_version = 8 # Needed for dynamic plugin loading

__license__ = 'GPL 3'
__copyright__ = '2011-2014, Tomasz Długosz <[email protected]>'
__docformat__ = 'restructuredtext en'

import re
import urllib
from base64 import b64encode
from contextlib import closing

from lxml import html

from PyQt4.Qt import QUrl

from calibre import browser, url_slash_cleaner
from calibre import url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog

from calibre.ebooks.chardet import strip_encoding_declarations
from calibre.utils.ipc.simple_worker import fork_job, WorkerError

js_browser = '''
from calibre.web.jsbrowser.browser import Browser, Timeout
import urllib
def get_results(url, timeout):
browser = Browser(default_timeout=timeout)
browser.visit(url)
browser.wait_for_element('#nw_content_list')
return browser.html
'''

class WoblinkStore(BasicStoreConfig, StorePlugin):

def open(self, parent=None, detail_item=None, external=False):
Expand Down Expand Up @@ -51,42 +63,42 @@ def search(self, query, max_results=10, timeout=60):
else:
url += '&limit=20'

br = browser()

counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka "]'):
if counter <= 0:
break

id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href'))
if not id:
continue

cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src'))
title = ''.join(data.xpath('.//h2[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()'))
author = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_opcjezakupu_cena"]/span/text()'))
price = re.sub('\.', ',', price)
formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_formaty"]/span/text()'))

s = SearchResult()
s.cover_url = 'http://woblink.com' + cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price + ' zł'
s.detail_item = id.strip()
s.formats = formats

if 'EPUB DRM' in formats:
s.drm = SearchResult.DRM_LOCKED

counter -= 1
yield s
else:
s.drm = SearchResult.DRM_UNLOCKED

counter -= 1
yield s

try:
results = fork_job(js_browser,'get_results', (url, timeout,), module_is_source_code=True)
except WorkerError as e:
raise Exception('Could not get results: %s'%e.orig_tb)
doc = html.fromstring(strip_encoding_declarations(results['result']))
for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka"]'):
if counter <= 0:
break

id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href'))
if not id:
continue

cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src'))
title = ''.join(data.xpath('.//h2[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()'))
author = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="nw_opcjezakupu_cena"]/text()'))
formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_format"]/span/text()'))

s = SearchResult()
s.cover_url = 'http://woblink.com' + cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price + ' zł'
s.detail_item = id.strip()
s.formats = formats

if 'DRM' in formats:
s.drm = SearchResult.DRM_LOCKED

counter -= 1
yield s
else:
s.drm = SearchResult.DRM_UNLOCKED

counter -= 1
yield s

0 comments on commit 35d3531

Please sign in to comment.