parent
f22df1ba57
commit
edf7926322
|
@ -4,7 +4,7 @@
|
|||
|
||||
此项目是添加一个豆瓣api provider实现,需要放到metadata_provider目录下
|
||||
|
||||
### 使用方法(新)
|
||||
### 使用方法
|
||||
|
||||
复制`src/NewDouban.py`到`calibre-web/cps/metadata_provider/`目录下,重启项目即可。
|
||||
|
||||
|
@ -12,11 +12,9 @@
|
|||
|
||||
参考文档:https://fugary.com/?p=238
|
||||
|
||||
### 使用方法(已废弃)
|
||||
**新版calibre-web 0.6.17以上使用**
|
||||
|
||||
~~修改`src/douban.py`中的`doubanUrl`地址后,复制`src/douban.py`到`calibre-web/cps/metadata_provider/`目录下,重启项目。~~
|
||||
|
||||
~~由于豆瓣api已经不开放使用了,这个豆瓣api需要连接`simple-boot-douban-api`使用~~
|
||||
小于0.6.17版本,请下载:https://github.com/fugary/calibre-web-douban-api/releases/tag/0.6.16
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ from urllib.parse import urlparse, unquote
|
|||
from lxml import etree
|
||||
from functools import lru_cache
|
||||
|
||||
from cps.services.Metadata import Metadata
|
||||
from cps.services.Metadata import Metadata, MetaSourceInfo, MetaRecord
|
||||
|
||||
DOUBAN_SEARCH_JSON_URL = "https://www.douban.com/j/search"
|
||||
DOUBAN_BOOK_CAT = "1001"
|
||||
|
@ -28,7 +28,7 @@ class NewDouban(Metadata):
|
|||
self.searcher = DoubanBookSearcher()
|
||||
super().__init__()
|
||||
|
||||
def search(self, query, generic_cover=""):
|
||||
def search(self, query: str, generic_cover: str = "", locale: str = "en"):
|
||||
if self.active:
|
||||
return self.searcher.search_books(query)
|
||||
|
||||
|
@ -94,55 +94,58 @@ class DoubanBookHtmlParser:
|
|||
self.id_pattern = re.compile(".*/subject/(\\d+)/?")
|
||||
|
||||
def parse_book(self, url, book_content):
|
||||
book = {}
|
||||
book = MetaRecord(
|
||||
id="",
|
||||
title="",
|
||||
authors=[],
|
||||
publisher="",
|
||||
description="",
|
||||
url="",
|
||||
source=MetaSourceInfo(
|
||||
id=PROVIDER_ID,
|
||||
description=PROVIDER_NAME,
|
||||
link="https://book.douban.com/"
|
||||
)
|
||||
)
|
||||
html = etree.HTML(book_content)
|
||||
title_element = html.xpath("//span[@property='v:itemreviewed']")
|
||||
book['title'] = self.get_text(title_element)
|
||||
book.title = self.get_text(title_element)
|
||||
share_element = html.xpath("//a[@data-url]")
|
||||
if len(share_element):
|
||||
url = share_element[0].attrib['data-url']
|
||||
book['url'] = url
|
||||
book.url = url
|
||||
id_match = self.id_pattern.match(url)
|
||||
if id_match:
|
||||
book['id'] = id_match.group(1)
|
||||
book.id = id_match.group(1)
|
||||
img_element = html.xpath("//a[@class='nbg']")
|
||||
if len(img_element):
|
||||
cover = img_element[0].attrib['href']
|
||||
if not cover or cover.endswith('update_image'):
|
||||
book['cover'] = ''
|
||||
book.cover = ''
|
||||
else:
|
||||
book['cover'] = cover
|
||||
book.cover = cover
|
||||
rating_element = html.xpath("//strong[@property='v:average']")
|
||||
book['rating'] = self.get_rating(rating_element)
|
||||
book.rating = self.get_rating(rating_element)
|
||||
elements = html.xpath("//span[@class='pl']")
|
||||
book['authors'] = []
|
||||
book['publisher'] = ''
|
||||
for element in elements:
|
||||
text = self.get_text(element)
|
||||
if text.startswith("作者"):
|
||||
book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
|
||||
elif text.startswith("译者"):
|
||||
book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
|
||||
if text.startswith("作者") or text.startswith("译者"):
|
||||
book.authors.extend([self.get_text(author_element) for author_element in
|
||||
filter(self.author_filter, element.findall("..//a"))])
|
||||
elif text.startswith("出版社"):
|
||||
book['publisher'] = self.get_tail(element)
|
||||
book.publisher = self.get_tail(element)
|
||||
elif text.startswith("副标题"):
|
||||
book['title'] = book['title'] + ':' + self.get_tail(element)
|
||||
book.title = book.title + ':' + self.get_tail(element)
|
||||
elif text.startswith("出版年"):
|
||||
book['publishedDate'] = self.get_tail(element)
|
||||
book.publishedDate = self.get_tail(element)
|
||||
elif text.startswith("丛书"):
|
||||
book['series'] = self.get_text(element.getnext())
|
||||
book.series = self.get_text(element.getnext())
|
||||
summary_element = html.xpath("//div[@id='link-report']//div[@class='intro']")
|
||||
book['description'] = ''
|
||||
if len(summary_element):
|
||||
book['description'] = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip()
|
||||
book.description = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip()
|
||||
tag_elements = html.xpath("//a[contains(@class, 'tag')]")
|
||||
if len(tag_elements):
|
||||
book['tags'] = [self.get_text(tag_element) for tag_element in tag_elements]
|
||||
book['source'] = {
|
||||
"id": PROVIDER_ID,
|
||||
"description": PROVIDER_NAME,
|
||||
"link": "https://book.douban.com/"
|
||||
}
|
||||
book.tags = [self.get_text(tag_element) for tag_element in tag_elements]
|
||||
return book
|
||||
|
||||
def get_rating(self, rating_element):
|
||||
|
|
|
@ -17,11 +17,50 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# 从calibre-web复制出来,方便测试使用
|
||||
class Metadata():
|
||||
import abc
|
||||
import dataclasses
|
||||
import os
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class MetaSourceInfo:
|
||||
id: str
|
||||
description: str
|
||||
link: str
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class MetaRecord:
|
||||
id: Union[str, int]
|
||||
title: str
|
||||
authors: List[str]
|
||||
url: str
|
||||
source: MetaSourceInfo
|
||||
cover: str = os.path.join("", 'generic_cover.jpg')
|
||||
description: Optional[str] = ""
|
||||
series: Optional[str] = None
|
||||
series_index: Optional[Union[int, float]] = 0
|
||||
identifiers: Dict[str, Union[str, int]] = dataclasses.field(default_factory=dict)
|
||||
publisher: Optional[str] = None
|
||||
publishedDate: Optional[str] = None
|
||||
rating: Optional[int] = 0
|
||||
languages: Optional[List[str]] = dataclasses.field(default_factory=list)
|
||||
tags: Optional[List[str]] = dataclasses.field(default_factory=list)
|
||||
|
||||
|
||||
class Metadata:
|
||||
__name__ = "Generic"
|
||||
__id__ = "generic"
|
||||
|
||||
def __init__(self):
|
||||
self.active = True
|
||||
|
||||
def set_status(self, state):
|
||||
self.active = state
|
||||
|
||||
@abc.abstractmethod
|
||||
def search(
|
||||
self, query: str, generic_cover: str = "", locale: str = "cn"
|
||||
) -> Optional[List[MetaRecord]]:
|
||||
pass
|
||||
|
|
|
@ -1,42 +0,0 @@
|
|||
import requests
|
||||
|
||||
from cps.services.Metadata import Metadata
|
||||
|
||||
|
||||
class Douban(Metadata):
|
||||
__name__ = "Douban Books"
|
||||
__id__ = "douban"
|
||||
doubanUrl = "http://YOUR_NAS_IP:8085"
|
||||
headers = {
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3573.0 Safari/537.36'
|
||||
}
|
||||
|
||||
def search(self, query, generic_cover=""):
|
||||
if self.active:
|
||||
val = list()
|
||||
result = requests.get(self.doubanUrl + "/v2/book/search?q=" + query.replace(" ", "+"), headers=self.headers)
|
||||
for r in result.json()['books']:
|
||||
v = dict()
|
||||
v['id'] = r['id']
|
||||
v['title'] = r['title']
|
||||
v['authors'] = r.get('authors', [])
|
||||
v['description'] = r.get('summary', "")
|
||||
v['publisher'] = r.get('publisher', "")
|
||||
v['publishedDate'] = r.get('pubdate', "")
|
||||
v['tags'] = [tag.get('name', '') for tag in r.get('tags', [])]
|
||||
rating = r['rating'].get('average', '0')
|
||||
if not rating:
|
||||
rating = '0'
|
||||
v['rating'] = float(rating) / 2
|
||||
if r.get('image'):
|
||||
v['cover'] = r.get('image')
|
||||
else:
|
||||
v['cover'] = generic_cover
|
||||
v['source'] = {
|
||||
"id": self.__id__,
|
||||
"description": self.__name__,
|
||||
"link": "https://book.douban.com/"
|
||||
}
|
||||
v['url'] = "https://book.douban.com/subject/" + r['id']
|
||||
val.append(v)
|
||||
return val
|
|
@ -1,7 +0,0 @@
|
|||
from douban import Douban
|
||||
|
||||
if __name__ == "__main__":
|
||||
douban = Douban()
|
||||
result = douban.search('人民的名义')
|
||||
for book in result:
|
||||
print(book.get('title'), book.get('url'))
|
Loading…
Reference in New Issue