parent
f22df1ba57
commit
edf7926322
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
此项目是添加一个豆瓣api provider实现,需要放到metadata_provider目录下
|
此项目是添加一个豆瓣api provider实现,需要放到metadata_provider目录下
|
||||||
|
|
||||||
### 使用方法(新)
|
### 使用方法
|
||||||
|
|
||||||
复制`src/NewDouban.py`到`calibre-web/cps/metadata_provider/`目录下,重启项目即可。
|
复制`src/NewDouban.py`到`calibre-web/cps/metadata_provider/`目录下,重启项目即可。
|
||||||
|
|
||||||
|
@ -12,11 +12,9 @@
|
||||||
|
|
||||||
参考文档:https://fugary.com/?p=238
|
参考文档:https://fugary.com/?p=238
|
||||||
|
|
||||||
### 使用方法(已废弃)
|
**新版calibre-web 0.6.17以上使用**
|
||||||
|
|
||||||
~~修改`src/douban.py`中的`doubanUrl`地址后,复制`src/douban.py`到`calibre-web/cps/metadata_provider/`目录下,重启项目。~~
|
小于0.6.17版本,请下载:https://github.com/fugary/calibre-web-douban-api/releases/tag/0.6.16
|
||||||
|
|
||||||
~~由于豆瓣api已经不开放使用了,这个豆瓣api需要连接`simple-boot-douban-api`使用~~
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ from urllib.parse import urlparse, unquote
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
|
||||||
from cps.services.Metadata import Metadata
|
from cps.services.Metadata import Metadata, MetaSourceInfo, MetaRecord
|
||||||
|
|
||||||
DOUBAN_SEARCH_JSON_URL = "https://www.douban.com/j/search"
|
DOUBAN_SEARCH_JSON_URL = "https://www.douban.com/j/search"
|
||||||
DOUBAN_BOOK_CAT = "1001"
|
DOUBAN_BOOK_CAT = "1001"
|
||||||
|
@ -28,7 +28,7 @@ class NewDouban(Metadata):
|
||||||
self.searcher = DoubanBookSearcher()
|
self.searcher = DoubanBookSearcher()
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
def search(self, query, generic_cover=""):
|
def search(self, query: str, generic_cover: str = "", locale: str = "en"):
|
||||||
if self.active:
|
if self.active:
|
||||||
return self.searcher.search_books(query)
|
return self.searcher.search_books(query)
|
||||||
|
|
||||||
|
@ -94,55 +94,58 @@ class DoubanBookHtmlParser:
|
||||||
self.id_pattern = re.compile(".*/subject/(\\d+)/?")
|
self.id_pattern = re.compile(".*/subject/(\\d+)/?")
|
||||||
|
|
||||||
def parse_book(self, url, book_content):
|
def parse_book(self, url, book_content):
|
||||||
book = {}
|
book = MetaRecord(
|
||||||
|
id="",
|
||||||
|
title="",
|
||||||
|
authors=[],
|
||||||
|
publisher="",
|
||||||
|
description="",
|
||||||
|
url="",
|
||||||
|
source=MetaSourceInfo(
|
||||||
|
id=PROVIDER_ID,
|
||||||
|
description=PROVIDER_NAME,
|
||||||
|
link="https://book.douban.com/"
|
||||||
|
)
|
||||||
|
)
|
||||||
html = etree.HTML(book_content)
|
html = etree.HTML(book_content)
|
||||||
title_element = html.xpath("//span[@property='v:itemreviewed']")
|
title_element = html.xpath("//span[@property='v:itemreviewed']")
|
||||||
book['title'] = self.get_text(title_element)
|
book.title = self.get_text(title_element)
|
||||||
share_element = html.xpath("//a[@data-url]")
|
share_element = html.xpath("//a[@data-url]")
|
||||||
if len(share_element):
|
if len(share_element):
|
||||||
url = share_element[0].attrib['data-url']
|
url = share_element[0].attrib['data-url']
|
||||||
book['url'] = url
|
book.url = url
|
||||||
id_match = self.id_pattern.match(url)
|
id_match = self.id_pattern.match(url)
|
||||||
if id_match:
|
if id_match:
|
||||||
book['id'] = id_match.group(1)
|
book.id = id_match.group(1)
|
||||||
img_element = html.xpath("//a[@class='nbg']")
|
img_element = html.xpath("//a[@class='nbg']")
|
||||||
if len(img_element):
|
if len(img_element):
|
||||||
cover = img_element[0].attrib['href']
|
cover = img_element[0].attrib['href']
|
||||||
if not cover or cover.endswith('update_image'):
|
if not cover or cover.endswith('update_image'):
|
||||||
book['cover'] = ''
|
book.cover = ''
|
||||||
else:
|
else:
|
||||||
book['cover'] = cover
|
book.cover = cover
|
||||||
rating_element = html.xpath("//strong[@property='v:average']")
|
rating_element = html.xpath("//strong[@property='v:average']")
|
||||||
book['rating'] = self.get_rating(rating_element)
|
book.rating = self.get_rating(rating_element)
|
||||||
elements = html.xpath("//span[@class='pl']")
|
elements = html.xpath("//span[@class='pl']")
|
||||||
book['authors'] = []
|
|
||||||
book['publisher'] = ''
|
|
||||||
for element in elements:
|
for element in elements:
|
||||||
text = self.get_text(element)
|
text = self.get_text(element)
|
||||||
if text.startswith("作者"):
|
if text.startswith("作者") or text.startswith("译者"):
|
||||||
book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
|
book.authors.extend([self.get_text(author_element) for author_element in
|
||||||
elif text.startswith("译者"):
|
filter(self.author_filter, element.findall("..//a"))])
|
||||||
book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
|
|
||||||
elif text.startswith("出版社"):
|
elif text.startswith("出版社"):
|
||||||
book['publisher'] = self.get_tail(element)
|
book.publisher = self.get_tail(element)
|
||||||
elif text.startswith("副标题"):
|
elif text.startswith("副标题"):
|
||||||
book['title'] = book['title'] + ':' + self.get_tail(element)
|
book.title = book.title + ':' + self.get_tail(element)
|
||||||
elif text.startswith("出版年"):
|
elif text.startswith("出版年"):
|
||||||
book['publishedDate'] = self.get_tail(element)
|
book.publishedDate = self.get_tail(element)
|
||||||
elif text.startswith("丛书"):
|
elif text.startswith("丛书"):
|
||||||
book['series'] = self.get_text(element.getnext())
|
book.series = self.get_text(element.getnext())
|
||||||
summary_element = html.xpath("//div[@id='link-report']//div[@class='intro']")
|
summary_element = html.xpath("//div[@id='link-report']//div[@class='intro']")
|
||||||
book['description'] = ''
|
|
||||||
if len(summary_element):
|
if len(summary_element):
|
||||||
book['description'] = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip()
|
book.description = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip()
|
||||||
tag_elements = html.xpath("//a[contains(@class, 'tag')]")
|
tag_elements = html.xpath("//a[contains(@class, 'tag')]")
|
||||||
if len(tag_elements):
|
if len(tag_elements):
|
||||||
book['tags'] = [self.get_text(tag_element) for tag_element in tag_elements]
|
book.tags = [self.get_text(tag_element) for tag_element in tag_elements]
|
||||||
book['source'] = {
|
|
||||||
"id": PROVIDER_ID,
|
|
||||||
"description": PROVIDER_NAME,
|
|
||||||
"link": "https://book.douban.com/"
|
|
||||||
}
|
|
||||||
return book
|
return book
|
||||||
|
|
||||||
def get_rating(self, rating_element):
|
def get_rating(self, rating_element):
|
||||||
|
|
|
@ -17,11 +17,50 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
# 从calibre-web复制出来,方便测试使用
|
# 从calibre-web复制出来,方便测试使用
|
||||||
class Metadata():
|
import abc
|
||||||
|
import dataclasses
|
||||||
|
import os
|
||||||
|
from typing import Dict, List, Optional, Union
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class MetaSourceInfo:
|
||||||
|
id: str
|
||||||
|
description: str
|
||||||
|
link: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class MetaRecord:
|
||||||
|
id: Union[str, int]
|
||||||
|
title: str
|
||||||
|
authors: List[str]
|
||||||
|
url: str
|
||||||
|
source: MetaSourceInfo
|
||||||
|
cover: str = os.path.join("", 'generic_cover.jpg')
|
||||||
|
description: Optional[str] = ""
|
||||||
|
series: Optional[str] = None
|
||||||
|
series_index: Optional[Union[int, float]] = 0
|
||||||
|
identifiers: Dict[str, Union[str, int]] = dataclasses.field(default_factory=dict)
|
||||||
|
publisher: Optional[str] = None
|
||||||
|
publishedDate: Optional[str] = None
|
||||||
|
rating: Optional[int] = 0
|
||||||
|
languages: Optional[List[str]] = dataclasses.field(default_factory=list)
|
||||||
|
tags: Optional[List[str]] = dataclasses.field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class Metadata:
|
||||||
__name__ = "Generic"
|
__name__ = "Generic"
|
||||||
|
__id__ = "generic"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.active = True
|
self.active = True
|
||||||
|
|
||||||
def set_status(self, state):
|
def set_status(self, state):
|
||||||
self.active = state
|
self.active = state
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def search(
|
||||||
|
self, query: str, generic_cover: str = "", locale: str = "cn"
|
||||||
|
) -> Optional[List[MetaRecord]]:
|
||||||
|
pass
|
||||||
|
|
|
@ -1,42 +0,0 @@
|
||||||
import requests
|
|
||||||
|
|
||||||
from cps.services.Metadata import Metadata
|
|
||||||
|
|
||||||
|
|
||||||
class Douban(Metadata):
|
|
||||||
__name__ = "Douban Books"
|
|
||||||
__id__ = "douban"
|
|
||||||
doubanUrl = "http://YOUR_NAS_IP:8085"
|
|
||||||
headers = {
|
|
||||||
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3573.0 Safari/537.36'
|
|
||||||
}
|
|
||||||
|
|
||||||
def search(self, query, generic_cover=""):
|
|
||||||
if self.active:
|
|
||||||
val = list()
|
|
||||||
result = requests.get(self.doubanUrl + "/v2/book/search?q=" + query.replace(" ", "+"), headers=self.headers)
|
|
||||||
for r in result.json()['books']:
|
|
||||||
v = dict()
|
|
||||||
v['id'] = r['id']
|
|
||||||
v['title'] = r['title']
|
|
||||||
v['authors'] = r.get('authors', [])
|
|
||||||
v['description'] = r.get('summary', "")
|
|
||||||
v['publisher'] = r.get('publisher', "")
|
|
||||||
v['publishedDate'] = r.get('pubdate', "")
|
|
||||||
v['tags'] = [tag.get('name', '') for tag in r.get('tags', [])]
|
|
||||||
rating = r['rating'].get('average', '0')
|
|
||||||
if not rating:
|
|
||||||
rating = '0'
|
|
||||||
v['rating'] = float(rating) / 2
|
|
||||||
if r.get('image'):
|
|
||||||
v['cover'] = r.get('image')
|
|
||||||
else:
|
|
||||||
v['cover'] = generic_cover
|
|
||||||
v['source'] = {
|
|
||||||
"id": self.__id__,
|
|
||||||
"description": self.__name__,
|
|
||||||
"link": "https://book.douban.com/"
|
|
||||||
}
|
|
||||||
v['url'] = "https://book.douban.com/subject/" + r['id']
|
|
||||||
val.append(v)
|
|
||||||
return val
|
|
|
@ -1,7 +0,0 @@
|
||||||
from douban import Douban
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
douban = Douban()
|
|
||||||
result = douban.search('人民的名义')
|
|
||||||
for book in result:
|
|
||||||
print(book.get('title'), book.get('url'))
|
|
Loading…
Reference in New Issue