处理calibre-web的0.6.17元数据API升级

移除不用的老文件
This commit is contained in:
gary.fu 2022-03-11 20:16:15 +08:00
parent f22df1ba57
commit edf7926322
5 changed files with 73 additions and 82 deletions

View File

@ -4,7 +4,7 @@
此项目是添加一个豆瓣api provider实现需要放到metadata_provider目录下 此项目是添加一个豆瓣api provider实现需要放到metadata_provider目录下
### 使用方法(新) ### 使用方法
复制`src/NewDouban.py`到`calibre-web/cps/metadata_provider/`目录下,重启项目即可。 复制`src/NewDouban.py`到`calibre-web/cps/metadata_provider/`目录下,重启项目即可。
@ -12,11 +12,9 @@
参考文档https://fugary.com/?p=238 参考文档https://fugary.com/?p=238
### 使用方法(已废弃) **新版calibre-web 0.6.17以上使用**
~~修改`src/douban.py`中的`doubanUrl`地址后,复制`src/douban.py`到`calibre-web/cps/metadata_provider/`目录下,重启项目。~~ 小于0.6.17版本请下载https://github.com/fugary/calibre-web-douban-api/releases/tag/0.6.16
~~由于豆瓣api已经不开放使用了这个豆瓣api需要连接`simple-boot-douban-api`使用~~

View File

@ -7,7 +7,7 @@ from urllib.parse import urlparse, unquote
from lxml import etree from lxml import etree
from functools import lru_cache from functools import lru_cache
from cps.services.Metadata import Metadata from cps.services.Metadata import Metadata, MetaSourceInfo, MetaRecord
DOUBAN_SEARCH_JSON_URL = "https://www.douban.com/j/search" DOUBAN_SEARCH_JSON_URL = "https://www.douban.com/j/search"
DOUBAN_BOOK_CAT = "1001" DOUBAN_BOOK_CAT = "1001"
@ -28,7 +28,7 @@ class NewDouban(Metadata):
self.searcher = DoubanBookSearcher() self.searcher = DoubanBookSearcher()
super().__init__() super().__init__()
def search(self, query, generic_cover=""): def search(self, query: str, generic_cover: str = "", locale: str = "en"):
if self.active: if self.active:
return self.searcher.search_books(query) return self.searcher.search_books(query)
@ -94,55 +94,58 @@ class DoubanBookHtmlParser:
self.id_pattern = re.compile(".*/subject/(\\d+)/?") self.id_pattern = re.compile(".*/subject/(\\d+)/?")
def parse_book(self, url, book_content): def parse_book(self, url, book_content):
book = {} book = MetaRecord(
id="",
title="",
authors=[],
publisher="",
description="",
url="",
source=MetaSourceInfo(
id=PROVIDER_ID,
description=PROVIDER_NAME,
link="https://book.douban.com/"
)
)
html = etree.HTML(book_content) html = etree.HTML(book_content)
title_element = html.xpath("//span[@property='v:itemreviewed']") title_element = html.xpath("//span[@property='v:itemreviewed']")
book['title'] = self.get_text(title_element) book.title = self.get_text(title_element)
share_element = html.xpath("//a[@data-url]") share_element = html.xpath("//a[@data-url]")
if len(share_element): if len(share_element):
url = share_element[0].attrib['data-url'] url = share_element[0].attrib['data-url']
book['url'] = url book.url = url
id_match = self.id_pattern.match(url) id_match = self.id_pattern.match(url)
if id_match: if id_match:
book['id'] = id_match.group(1) book.id = id_match.group(1)
img_element = html.xpath("//a[@class='nbg']") img_element = html.xpath("//a[@class='nbg']")
if len(img_element): if len(img_element):
cover = img_element[0].attrib['href'] cover = img_element[0].attrib['href']
if not cover or cover.endswith('update_image'): if not cover or cover.endswith('update_image'):
book['cover'] = '' book.cover = ''
else: else:
book['cover'] = cover book.cover = cover
rating_element = html.xpath("//strong[@property='v:average']") rating_element = html.xpath("//strong[@property='v:average']")
book['rating'] = self.get_rating(rating_element) book.rating = self.get_rating(rating_element)
elements = html.xpath("//span[@class='pl']") elements = html.xpath("//span[@class='pl']")
book['authors'] = []
book['publisher'] = ''
for element in elements: for element in elements:
text = self.get_text(element) text = self.get_text(element)
if text.startswith("作者"): if text.startswith("作者") or text.startswith("译者"):
book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))]) book.authors.extend([self.get_text(author_element) for author_element in
elif text.startswith("译者"): filter(self.author_filter, element.findall("..//a"))])
book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
elif text.startswith("出版社"): elif text.startswith("出版社"):
book['publisher'] = self.get_tail(element) book.publisher = self.get_tail(element)
elif text.startswith("副标题"): elif text.startswith("副标题"):
book['title'] = book['title'] + ':' + self.get_tail(element) book.title = book.title + ':' + self.get_tail(element)
elif text.startswith("出版年"): elif text.startswith("出版年"):
book['publishedDate'] = self.get_tail(element) book.publishedDate = self.get_tail(element)
elif text.startswith("丛书"): elif text.startswith("丛书"):
book['series'] = self.get_text(element.getnext()) book.series = self.get_text(element.getnext())
summary_element = html.xpath("//div[@id='link-report']//div[@class='intro']") summary_element = html.xpath("//div[@id='link-report']//div[@class='intro']")
book['description'] = ''
if len(summary_element): if len(summary_element):
book['description'] = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip() book.description = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip()
tag_elements = html.xpath("//a[contains(@class, 'tag')]") tag_elements = html.xpath("//a[contains(@class, 'tag')]")
if len(tag_elements): if len(tag_elements):
book['tags'] = [self.get_text(tag_element) for tag_element in tag_elements] book.tags = [self.get_text(tag_element) for tag_element in tag_elements]
book['source'] = {
"id": PROVIDER_ID,
"description": PROVIDER_NAME,
"link": "https://book.douban.com/"
}
return book return book
def get_rating(self, rating_element): def get_rating(self, rating_element):

View File

@ -17,11 +17,50 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# 从calibre-web复制出来方便测试使用 # 从calibre-web复制出来方便测试使用
class Metadata(): import abc
import dataclasses
import os
from typing import Dict, List, Optional, Union
@dataclasses.dataclass
class MetaSourceInfo:
id: str
description: str
link: str
@dataclasses.dataclass
class MetaRecord:
id: Union[str, int]
title: str
authors: List[str]
url: str
source: MetaSourceInfo
cover: str = os.path.join("", 'generic_cover.jpg')
description: Optional[str] = ""
series: Optional[str] = None
series_index: Optional[Union[int, float]] = 0
identifiers: Dict[str, Union[str, int]] = dataclasses.field(default_factory=dict)
publisher: Optional[str] = None
publishedDate: Optional[str] = None
rating: Optional[int] = 0
languages: Optional[List[str]] = dataclasses.field(default_factory=list)
tags: Optional[List[str]] = dataclasses.field(default_factory=list)
class Metadata:
__name__ = "Generic" __name__ = "Generic"
__id__ = "generic"
def __init__(self): def __init__(self):
self.active = True self.active = True
def set_status(self, state): def set_status(self, state):
self.active = state self.active = state
@abc.abstractmethod
def search(
self, query: str, generic_cover: str = "", locale: str = "cn"
) -> Optional[List[MetaRecord]]:
pass

View File

@ -1,42 +0,0 @@
import requests
from cps.services.Metadata import Metadata
class Douban(Metadata):
__name__ = "Douban Books"
__id__ = "douban"
doubanUrl = "http://YOUR_NAS_IP:8085"
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3573.0 Safari/537.36'
}
def search(self, query, generic_cover=""):
if self.active:
val = list()
result = requests.get(self.doubanUrl + "/v2/book/search?q=" + query.replace(" ", "+"), headers=self.headers)
for r in result.json()['books']:
v = dict()
v['id'] = r['id']
v['title'] = r['title']
v['authors'] = r.get('authors', [])
v['description'] = r.get('summary', "")
v['publisher'] = r.get('publisher', "")
v['publishedDate'] = r.get('pubdate', "")
v['tags'] = [tag.get('name', '') for tag in r.get('tags', [])]
rating = r['rating'].get('average', '0')
if not rating:
rating = '0'
v['rating'] = float(rating) / 2
if r.get('image'):
v['cover'] = r.get('image')
else:
v['cover'] = generic_cover
v['source'] = {
"id": self.__id__,
"description": self.__name__,
"link": "https://book.douban.com/"
}
v['url'] = "https://book.douban.com/subject/" + r['id']
val.append(v)
return val

View File

@ -1,7 +0,0 @@
from douban import Douban
if __name__ == "__main__":
douban = Douban()
result = douban.search('人民的名义')
for book in result:
print(book.get('title'), book.get('url'))