diff --git a/README.md b/README.md index 52f692d..4033e67 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ 此项目是添加一个豆瓣api provider实现,需要放到metadata_provider目录下 -### 使用方法(新) +### 使用方法 复制`src/NewDouban.py`到`calibre-web/cps/metadata_provider/`目录下,重启项目即可。 @@ -12,11 +12,9 @@ 参考文档:https://fugary.com/?p=238 -### 使用方法(已废弃) +**新版calibre-web 0.6.17以上使用** -~~修改`src/douban.py`中的`doubanUrl`地址后,复制`src/douban.py`到`calibre-web/cps/metadata_provider/`目录下,重启项目。~~ - -~~由于豆瓣api已经不开放使用了,这个豆瓣api需要连接`simple-boot-douban-api`使用~~ +小于0.6.17版本,请下载:https://github.com/fugary/calibre-web-douban-api/releases/tag/0.6.16 diff --git a/src/NewDouban.py b/src/NewDouban.py index bc04fd6..e401e97 100644 --- a/src/NewDouban.py +++ b/src/NewDouban.py @@ -7,7 +7,7 @@ from urllib.parse import urlparse, unquote from lxml import etree from functools import lru_cache -from cps.services.Metadata import Metadata +from cps.services.Metadata import Metadata, MetaSourceInfo, MetaRecord DOUBAN_SEARCH_JSON_URL = "https://www.douban.com/j/search" DOUBAN_BOOK_CAT = "1001" @@ -28,7 +28,7 @@ class NewDouban(Metadata): self.searcher = DoubanBookSearcher() super().__init__() - def search(self, query, generic_cover=""): + def search(self, query: str, generic_cover: str = "", locale: str = "en"): if self.active: return self.searcher.search_books(query) @@ -94,55 +94,58 @@ class DoubanBookHtmlParser: self.id_pattern = re.compile(".*/subject/(\\d+)/?") def parse_book(self, url, book_content): - book = {} + book = MetaRecord( + id="", + title="", + authors=[], + publisher="", + description="", + url="", + source=MetaSourceInfo( + id=PROVIDER_ID, + description=PROVIDER_NAME, + link="https://book.douban.com/" + ) + ) html = etree.HTML(book_content) title_element = html.xpath("//span[@property='v:itemreviewed']") - book['title'] = self.get_text(title_element) + book.title = self.get_text(title_element) share_element = html.xpath("//a[@data-url]") if len(share_element): url = share_element[0].attrib['data-url'] - book['url'] = url + book.url = url id_match = self.id_pattern.match(url) if id_match: - book['id'] = id_match.group(1) + book.id = id_match.group(1) img_element = html.xpath("//a[@class='nbg']") if len(img_element): cover = img_element[0].attrib['href'] if not cover or cover.endswith('update_image'): - book['cover'] = '' + book.cover = '' else: - book['cover'] = cover + book.cover = cover rating_element = html.xpath("//strong[@property='v:average']") - book['rating'] = self.get_rating(rating_element) + book.rating = self.get_rating(rating_element) elements = html.xpath("//span[@class='pl']") - book['authors'] = [] - book['publisher'] = '' for element in elements: text = self.get_text(element) - if text.startswith("作者"): - book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))]) - elif text.startswith("译者"): - book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))]) + if text.startswith("作者") or text.startswith("译者"): + book.authors.extend([self.get_text(author_element) for author_element in + filter(self.author_filter, element.findall("..//a"))]) elif text.startswith("出版社"): - book['publisher'] = self.get_tail(element) + book.publisher = self.get_tail(element) elif text.startswith("副标题"): - book['title'] = book['title'] + ':' + self.get_tail(element) + book.title = book.title + ':' + self.get_tail(element) elif text.startswith("出版年"): - book['publishedDate'] = self.get_tail(element) + book.publishedDate = self.get_tail(element) elif text.startswith("丛书"): - book['series'] = self.get_text(element.getnext()) + book.series = self.get_text(element.getnext()) summary_element = html.xpath("//div[@id='link-report']//div[@class='intro']") - book['description'] = '' if len(summary_element): - book['description'] = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip() + book.description = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip() tag_elements = html.xpath("//a[contains(@class, 'tag')]") if len(tag_elements): - book['tags'] = [self.get_text(tag_element) for tag_element in tag_elements] - book['source'] = { - "id": PROVIDER_ID, - "description": PROVIDER_NAME, - "link": "https://book.douban.com/" - } + book.tags = [self.get_text(tag_element) for tag_element in tag_elements] return book def get_rating(self, rating_element): diff --git a/src/cps/services/Metadata.py b/src/cps/services/Metadata.py index 4456cfa..d9d78b4 100644 --- a/src/cps/services/Metadata.py +++ b/src/cps/services/Metadata.py @@ -17,11 +17,50 @@ # along with this program. If not, see . # 从calibre-web复制出来,方便测试使用 -class Metadata(): +import abc +import dataclasses +import os +from typing import Dict, List, Optional, Union + + +@dataclasses.dataclass +class MetaSourceInfo: + id: str + description: str + link: str + + +@dataclasses.dataclass +class MetaRecord: + id: Union[str, int] + title: str + authors: List[str] + url: str + source: MetaSourceInfo + cover: str = os.path.join("", 'generic_cover.jpg') + description: Optional[str] = "" + series: Optional[str] = None + series_index: Optional[Union[int, float]] = 0 + identifiers: Dict[str, Union[str, int]] = dataclasses.field(default_factory=dict) + publisher: Optional[str] = None + publishedDate: Optional[str] = None + rating: Optional[int] = 0 + languages: Optional[List[str]] = dataclasses.field(default_factory=list) + tags: Optional[List[str]] = dataclasses.field(default_factory=list) + + +class Metadata: __name__ = "Generic" + __id__ = "generic" def __init__(self): self.active = True def set_status(self, state): self.active = state + + @abc.abstractmethod + def search( + self, query: str, generic_cover: str = "", locale: str = "cn" + ) -> Optional[List[MetaRecord]]: + pass diff --git a/src/douban.py b/src/douban.py deleted file mode 100644 index d038ffe..0000000 --- a/src/douban.py +++ /dev/null @@ -1,42 +0,0 @@ -import requests - -from cps.services.Metadata import Metadata - - -class Douban(Metadata): - __name__ = "Douban Books" - __id__ = "douban" - doubanUrl = "http://YOUR_NAS_IP:8085" - headers = { - 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3573.0 Safari/537.36' - } - - def search(self, query, generic_cover=""): - if self.active: - val = list() - result = requests.get(self.doubanUrl + "/v2/book/search?q=" + query.replace(" ", "+"), headers=self.headers) - for r in result.json()['books']: - v = dict() - v['id'] = r['id'] - v['title'] = r['title'] - v['authors'] = r.get('authors', []) - v['description'] = r.get('summary', "") - v['publisher'] = r.get('publisher', "") - v['publishedDate'] = r.get('pubdate', "") - v['tags'] = [tag.get('name', '') for tag in r.get('tags', [])] - rating = r['rating'].get('average', '0') - if not rating: - rating = '0' - v['rating'] = float(rating) / 2 - if r.get('image'): - v['cover'] = r.get('image') - else: - v['cover'] = generic_cover - v['source'] = { - "id": self.__id__, - "description": self.__name__, - "link": "https://book.douban.com/" - } - v['url'] = "https://book.douban.com/subject/" + r['id'] - val.append(v) - return val diff --git a/tests/DoubanTest.py b/tests/DoubanTest.py deleted file mode 100644 index ea6b102..0000000 --- a/tests/DoubanTest.py +++ /dev/null @@ -1,7 +0,0 @@ -from douban import Douban - -if __name__ == "__main__": - douban = Douban() - result = douban.search('人民的名义') - for book in result: - print(book.get('title'), book.get('url'))