处理calibre-web的0.6.17元数据API升级

移除不用的老文件
2022-03-11 20:16:15 +08:00 · 2022-03-11 20:16:15 +08:00 · edf7926322
parent f22df1ba57
commit edf7926322
5 changed files with 73 additions and 82 deletions
--- a/README.md
+++ b/README.md
@ -4,7 +4,7 @@

 此项目是添加一个豆瓣api provider实现，需要放到metadata_provider目录下

-### 使用方法（新）
+### 使用方法

 复制`src/NewDouban.py`到`calibre-web/cps/metadata_provider/`目录下，重启项目即可。

@ -12,11 +12,9 @@

 参考文档：https://fugary.com/?p=238

-### 使用方法（已废弃）
+**新版calibre-web 0.6.17以上使用**

-~~修改`src/douban.py`中的`doubanUrl`地址后，复制`src/douban.py`到`calibre-web/cps/metadata_provider/`目录下，重启项目。~~
-
-~~由于豆瓣api已经不开放使用了，这个豆瓣api需要连接`simple-boot-douban-api`使用~~
+小于0.6.17版本，请下载：https://github.com/fugary/calibre-web-douban-api/releases/tag/0.6.16



--- a/src/NewDouban.py
+++ b/src/NewDouban.py
@ -7,7 +7,7 @@ from urllib.parse import urlparse, unquote
 from lxml import etree
 from functools import lru_cache

-from cps.services.Metadata import Metadata
+from cps.services.Metadata import Metadata, MetaSourceInfo, MetaRecord

 DOUBAN_SEARCH_JSON_URL = "https://www.douban.com/j/search"
 DOUBAN_BOOK_CAT = "1001"
@ -28,7 +28,7 @@ class NewDouban(Metadata):
        self.searcher = DoubanBookSearcher()
        super().__init__()

-    def search(self, query, generic_cover=""):
+    def search(self, query: str, generic_cover: str = "", locale: str = "en"):
        if self.active:
            return self.searcher.search_books(query)

@ -94,55 +94,58 @@ class DoubanBookHtmlParser:
        self.id_pattern = re.compile(".*/subject/(\\d+)/?")

    def parse_book(self, url, book_content):
-        book = {}
+        book = MetaRecord(
+            id="",
+            title="",
+            authors=[],
+            publisher="",
+            description="",
+            url="",
+            source=MetaSourceInfo(
+                id=PROVIDER_ID,
+                description=PROVIDER_NAME,
+                link="https://book.douban.com/"
+            )
+        )
        html = etree.HTML(book_content)
        title_element = html.xpath("//span[@property='v:itemreviewed']")
-        book['title'] = self.get_text(title_element)
+        book.title = self.get_text(title_element)
        share_element = html.xpath("//a[@data-url]")
        if len(share_element):
            url = share_element[0].attrib['data-url']
-        book['url'] = url
+        book.url = url
        id_match = self.id_pattern.match(url)
        if id_match:
-            book['id'] = id_match.group(1)
+            book.id = id_match.group(1)
        img_element = html.xpath("//a[@class='nbg']")
        if len(img_element):
            cover = img_element[0].attrib['href']
            if not cover or cover.endswith('update_image'):
-                book['cover'] = ''
+                book.cover = ''
            else:
-                book['cover'] = cover
+                book.cover = cover
        rating_element = html.xpath("//strong[@property='v:average']")
-        book['rating'] = self.get_rating(rating_element)
+        book.rating = self.get_rating(rating_element)
        elements = html.xpath("//span[@class='pl']")
-        book['authors'] = []
-        book['publisher'] = ''
        for element in elements:
            text = self.get_text(element)
-            if text.startswith("作者"):
-                book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
-            elif text.startswith("译者"):
-                book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
+            if text.startswith("作者") or text.startswith("译者"):
+                book.authors.extend([self.get_text(author_element) for author_element in
+                                     filter(self.author_filter, element.findall("..//a"))])
            elif text.startswith("出版社"):
-                book['publisher'] = self.get_tail(element)
+                book.publisher = self.get_tail(element)
            elif text.startswith("副标题"):
-                book['title'] = book['title'] + ':' + self.get_tail(element)
+                book.title = book.title + ':' + self.get_tail(element)
            elif text.startswith("出版年"):
-                book['publishedDate'] = self.get_tail(element)
+                book.publishedDate = self.get_tail(element)
            elif text.startswith("丛书"):
-                book['series'] = self.get_text(element.getnext())
+                book.series = self.get_text(element.getnext())
        summary_element = html.xpath("//div[@id='link-report']//div[@class='intro']")
-        book['description'] = ''
        if len(summary_element):
-            book['description'] = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip()
+            book.description = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip()
        tag_elements = html.xpath("//a[contains(@class, 'tag')]")
        if len(tag_elements):
-            book['tags'] = [self.get_text(tag_element) for tag_element in tag_elements]
-        book['source'] = {
-            "id": PROVIDER_ID,
-            "description": PROVIDER_NAME,
-            "link": "https://book.douban.com/"
-        }
+            book.tags = [self.get_text(tag_element) for tag_element in tag_elements]
        return book

    def get_rating(self, rating_element):
--- a/src/cps/services/Metadata.py
+++ b/src/cps/services/Metadata.py
@ -17,11 +17,50 @@
 #  along with this program. If not, see <http://www.gnu.org/licenses/>.

 # 从calibre-web复制出来，方便测试使用
-class Metadata():
+import abc
+import dataclasses
+import os
+from typing import Dict, List, Optional, Union
+
+
+@dataclasses.dataclass
+class MetaSourceInfo:
+    id: str
+    description: str
+    link: str
+
+
+@dataclasses.dataclass
+class MetaRecord:
+    id: Union[str, int]
+    title: str
+    authors: List[str]
+    url: str
+    source: MetaSourceInfo
+    cover: str = os.path.join("", 'generic_cover.jpg')
+    description: Optional[str] = ""
+    series: Optional[str] = None
+    series_index: Optional[Union[int, float]] = 0
+    identifiers: Dict[str, Union[str, int]] = dataclasses.field(default_factory=dict)
+    publisher: Optional[str] = None
+    publishedDate: Optional[str] = None
+    rating: Optional[int] = 0
+    languages: Optional[List[str]] = dataclasses.field(default_factory=list)
+    tags: Optional[List[str]] = dataclasses.field(default_factory=list)
+
+
+class Metadata:
    __name__ = "Generic"
+    __id__ = "generic"

    def __init__(self):
        self.active = True

    def set_status(self, state):
        self.active = state
+
+    @abc.abstractmethod
+    def search(
+            self, query: str, generic_cover: str = "", locale: str = "cn"
+    ) -> Optional[List[MetaRecord]]:
+        pass
--- a/src/douban.py
+++ b/src/douban.py
@ -1,42 +0,0 @@
-import requests
-
-from cps.services.Metadata import Metadata
-
-
-class Douban(Metadata):
-    __name__ = "Douban Books"
-    __id__ = "douban"
-    doubanUrl = "http://YOUR_NAS_IP:8085"
-    headers = {
-        'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3573.0 Safari/537.36'
-    }
-
-    def search(self, query, generic_cover=""):
-        if self.active:
-            val = list()
-            result = requests.get(self.doubanUrl + "/v2/book/search?q=" + query.replace(" ", "+"), headers=self.headers)
-            for r in result.json()['books']:
-                v = dict()
-                v['id'] = r['id']
-                v['title'] = r['title']
-                v['authors'] = r.get('authors', [])
-                v['description'] = r.get('summary', "")
-                v['publisher'] = r.get('publisher', "")
-                v['publishedDate'] = r.get('pubdate', "")
-                v['tags'] = [tag.get('name', '') for tag in r.get('tags', [])]
-                rating = r['rating'].get('average', '0')
-                if not rating:
-                    rating = '0'
-                v['rating'] = float(rating) / 2
-                if r.get('image'):
-                    v['cover'] = r.get('image')
-                else:
-                    v['cover'] = generic_cover
-                v['source'] = {
-                    "id": self.__id__,
-                    "description": self.__name__,
-                    "link": "https://book.douban.com/"
-                }
-                v['url'] = "https://book.douban.com/subject/" + r['id']
-                val.append(v)
-            return val
--- a/tests/DoubanTest.py
+++ b/tests/DoubanTest.py
@ -1,7 +0,0 @@
-from douban import Douban
-
-if __name__ == "__main__":
-    douban = Douban()
-    result = douban.search('人民的名义')
-    for book in result:
-        print(book.get('title'), book.get('url'))