处理calibre-web的0.6.17元数据API升级

移除不用的老文件
2022-03-11 20:16:15 +08:00 · 2022-03-11 20:16:15 +08:00 · edf7926322
parent f22df1ba57
commit edf7926322
5 changed files with 73 additions and 82 deletions
--- a/README.md
+++ b/README.md
@ -4,7 +4,7 @@
 此项目是添加一个豆瓣api provider实现，需要放到metadata_provider目录下
-### 使用方法（新）
+### 使用方法
 复制`src/NewDouban.py`到`calibre-web/cps/metadata_provider/`目录下，重启项目即可。
@ -12,11 +12,9 @@
 参考文档：https://fugary.com/?p=238
-### 使用方法（已废弃）
+**新版calibre-web 0.6.17以上使用**
-~~修改`src/douban.py`中的`doubanUrl`地址后，复制`src/douban.py`到`calibre-web/cps/metadata_provider/`目录下，重启项目。~~
+小于0.6.17版本，请下载：https://github.com/fugary/calibre-web-douban-api/releases/tag/0.6.16
 ~~由于豆瓣api已经不开放使用了，这个豆瓣api需要连接`simple-boot-douban-api`使用~~
--- a/src/NewDouban.py
+++ b/src/NewDouban.py
@ -7,7 +7,7 @@ from urllib.parse import urlparse, unquote
 from lxml import etree
 from functools import lru_cache
-from cps.services.Metadata import Metadata
+from cps.services.Metadata import Metadata, MetaSourceInfo, MetaRecord
 DOUBAN_SEARCH_JSON_URL = "https://www.douban.com/j/search"
 DOUBAN_BOOK_CAT = "1001"
@ -28,7 +28,7 @@ class NewDouban(Metadata):
        self.searcher = DoubanBookSearcher()
        super().__init__()
-    def search(self, query, generic_cover=""):
+    def search(self, query: str, generic_cover: str = "", locale: str = "en"):
        if self.active:
            return self.searcher.search_books(query)
@ -94,55 +94,58 @@ class DoubanBookHtmlParser:
        self.id_pattern = re.compile(".*/subject/(\\d+)/?")
    def parse_book(self, url, book_content):
-        book = {}
+        book = MetaRecord(
            id="",
            title="",
            authors=[],
            publisher="",
            description="",
            url="",
            source=MetaSourceInfo(
                id=PROVIDER_ID,
                description=PROVIDER_NAME,
                link="https://book.douban.com/"
            )
        )
        html = etree.HTML(book_content)
        title_element = html.xpath("//span[@property='v:itemreviewed']")
-        book['title'] = self.get_text(title_element)
+        book.title = self.get_text(title_element)
        share_element = html.xpath("//a[@data-url]")
        if len(share_element):
            url = share_element[0].attrib['data-url']
-        book['url'] = url
+        book.url = url
        id_match = self.id_pattern.match(url)
        if id_match:
-            book['id'] = id_match.group(1)
+            book.id = id_match.group(1)
        img_element = html.xpath("//a[@class='nbg']")
        if len(img_element):
            cover = img_element[0].attrib['href']
            if not cover or cover.endswith('update_image'):
-                book['cover'] = ''
+                book.cover = ''
            else:
-                book['cover'] = cover
+                book.cover = cover
        rating_element = html.xpath("//strong[@property='v:average']")
-        book['rating'] = self.get_rating(rating_element)
+        book.rating = self.get_rating(rating_element)
        elements = html.xpath("//span[@class='pl']")
        book['authors'] = []
        book['publisher'] = ''
        for element in elements:
            text = self.get_text(element)
-            if text.startswith("作者"):
+            if text.startswith("作者") or text.startswith("译者"):
-                book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
+                book.authors.extend([self.get_text(author_element) for author_element in
-            elif text.startswith("译者"):
+                                     filter(self.author_filter, element.findall("..//a"))])
                book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
            elif text.startswith("出版社"):
-                book['publisher'] = self.get_tail(element)
+                book.publisher = self.get_tail(element)
            elif text.startswith("副标题"):
-                book['title'] = book['title'] + ':' + self.get_tail(element)
+                book.title = book.title + ':' + self.get_tail(element)
            elif text.startswith("出版年"):
-                book['publishedDate'] = self.get_tail(element)
+                book.publishedDate = self.get_tail(element)
            elif text.startswith("丛书"):
-                book['series'] = self.get_text(element.getnext())
+                book.series = self.get_text(element.getnext())
        summary_element = html.xpath("//div[@id='link-report']//div[@class='intro']")
        book['description'] = ''
        if len(summary_element):
-            book['description'] = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip()
+            book.description = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip()
        tag_elements = html.xpath("//a[contains(@class, 'tag')]")
        if len(tag_elements):
-            book['tags'] = [self.get_text(tag_element) for tag_element in tag_elements]
+            book.tags = [self.get_text(tag_element) for tag_element in tag_elements]
        book['source'] = {
            "id": PROVIDER_ID,
            "description": PROVIDER_NAME,
            "link": "https://book.douban.com/"
        }
        return book
    def get_rating(self, rating_element):
--- a/src/cps/services/Metadata.py
+++ b/src/cps/services/Metadata.py
@ -17,11 +17,50 @@
 #  along with this program. If not, see <http://www.gnu.org/licenses/>.
 # 从calibre-web复制出来，方便测试使用
-class Metadata():
+import abc
 import dataclasses
 import os
 from typing import Dict, List, Optional, Union
@dataclasses.dataclass
 class MetaSourceInfo:
    id: str
    description: str
    link: str
@dataclasses.dataclass
 class MetaRecord:
    id: Union[str, int]
    title: str
    authors: List[str]
    url: str
    source: MetaSourceInfo
    cover: str = os.path.join("", 'generic_cover.jpg')
    description: Optional[str] = ""
    series: Optional[str] = None
    series_index: Optional[Union[int, float]] = 0
    identifiers: Dict[str, Union[str, int]] = dataclasses.field(default_factory=dict)
    publisher: Optional[str] = None
    publishedDate: Optional[str] = None
    rating: Optional[int] = 0
    languages: Optional[List[str]] = dataclasses.field(default_factory=list)
    tags: Optional[List[str]] = dataclasses.field(default_factory=list)
 class Metadata:
    __name__ = "Generic"
    __id__ = "generic"
    def __init__(self):
        self.active = True
    def set_status(self, state):
        self.active = state
    @abc.abstractmethod
    def search(
            self, query: str, generic_cover: str = "", locale: str = "cn"
    ) -> Optional[List[MetaRecord]]:
        pass
--- a/src/douban.py
+++ b/src/douban.py
@ -1,42 +0,0 @@
 import requests
 from cps.services.Metadata import Metadata
 class Douban(Metadata):
    __name__ = "Douban Books"
    __id__ = "douban"
    doubanUrl = "http://YOUR_NAS_IP:8085"
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3573.0 Safari/537.36'
    }
    def search(self, query, generic_cover=""):
        if self.active:
            val = list()
            result = requests.get(self.doubanUrl + "/v2/book/search?q=" + query.replace(" ", "+"), headers=self.headers)
            for r in result.json()['books']:
                v = dict()
                v['id'] = r['id']
                v['title'] = r['title']
                v['authors'] = r.get('authors', [])
                v['description'] = r.get('summary', "")
                v['publisher'] = r.get('publisher', "")
                v['publishedDate'] = r.get('pubdate', "")
                v['tags'] = [tag.get('name', '') for tag in r.get('tags', [])]
                rating = r['rating'].get('average', '0')
                if not rating:
                    rating = '0'
                v['rating'] = float(rating) / 2
                if r.get('image'):
                    v['cover'] = r.get('image')
                else:
                    v['cover'] = generic_cover
                v['source'] = {
                    "id": self.__id__,
                    "description": self.__name__,
                    "link": "https://book.douban.com/"
                }
                v['url'] = "https://book.douban.com/subject/" + r['id']
                val.append(v)
            return val
--- a/tests/DoubanTest.py
+++ b/tests/DoubanTest.py
@ -1,7 +0,0 @@
 from douban import Douban
 if __name__ == "__main__":
    douban = Douban()
    result = douban.search('人民的名义')
    for book in result:
        print(book.get('title'), book.get('url'))