提交一些新尝试
This commit is contained in:
parent
d6eedceaa3
commit
7d62e86d99
|
@ -1,2 +1,2 @@
|
|||
requests>=2.11.1,<2.25.0
|
||||
lxml>=3.8.0,<4.7.0
|
||||
requests>=2.11.1,<2.29.0
|
||||
lxml>=3.8.0,<5.0.0
|
|
@ -1,6 +1,8 @@
|
|||
import random
|
||||
import re
|
||||
import time
|
||||
import dataclasses
|
||||
import urllib
|
||||
|
||||
import requests
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
@ -10,15 +12,24 @@ from functools import lru_cache
|
|||
|
||||
from cps.services.Metadata import Metadata, MetaSourceInfo, MetaRecord
|
||||
|
||||
DOUBAN_SEARCH_JSON_URL = "https://www.douban.com/j/search" # 最新豆瓣屏蔽此url
|
||||
from cps.search_metadata import meta
|
||||
from flask import request, Response
|
||||
|
||||
# 是否自动代理封面地址
|
||||
DOUBAN_PROXY_COVER = True
|
||||
# 如果自动计算的服务器地址不正确,可以填写自己的calibre-web地址,参考:http://nas_ip:8083/
|
||||
DOUBAN_PROXY_COVER_HOST_URL = ''
|
||||
DOUBAN_PROXY_COVER_PATH = 'metadata/douban_cover?cover='
|
||||
DOUBAN_SEARCH_URL = "https://www.douban.com/search"
|
||||
DOUBAN_BASE = "https://book.douban.com/"
|
||||
DOUBAN_BOOK_CAT = "1001"
|
||||
DOUBAN_BOOK_CACHE_SIZE = 500 # 最大缓存数量
|
||||
DOUBAN_CONCURRENCY_SIZE = 5 # 并发查询数
|
||||
DOUBAN_BOOK_URL_PATTERN = re.compile(".*/subject/(\\d+)/?")
|
||||
DEFAULT_HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3573.0 Safari/537.36',
|
||||
'Accept-Encoding': 'gzip, deflate'
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Referer': DOUBAN_BASE
|
||||
}
|
||||
PROVIDER_NAME = "New Douban Books"
|
||||
PROVIDER_ID = "new_douban"
|
||||
|
@ -37,6 +48,24 @@ class NewDouban(Metadata):
|
|||
return self.searcher.search_books(query)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class DoubanMetaRecord(MetaRecord):
|
||||
|
||||
def __getattribute__(self, item): # cover通过本地服务代理访问
|
||||
if item == 'cover' and DOUBAN_PROXY_COVER:
|
||||
cover_url = super().__getattribute__(item)
|
||||
if cover_url:
|
||||
try:
|
||||
host_url = DOUBAN_PROXY_COVER_HOST_URL
|
||||
if not host_url and request.host_url:
|
||||
host_url = request.host_url
|
||||
if host_url and host_url not in cover_url:
|
||||
self.cover = host_url + DOUBAN_PROXY_COVER_PATH + urllib.parse.quote(cover_url)
|
||||
except BaseException:
|
||||
pass
|
||||
return super().__getattribute__(item)
|
||||
|
||||
|
||||
class DoubanBookSearcher:
|
||||
|
||||
def __init__(self):
|
||||
|
@ -61,9 +90,8 @@ class DoubanBookSearcher:
|
|||
for link in alist:
|
||||
href = link.attrib['href']
|
||||
parsed = self.calc_url(href)
|
||||
if parsed:
|
||||
if len(book_urls) < DOUBAN_CONCURRENCY_SIZE:
|
||||
book_urls.append(parsed)
|
||||
if parsed and len(book_urls) < DOUBAN_CONCURRENCY_SIZE:
|
||||
book_urls.append(parsed)
|
||||
return book_urls
|
||||
|
||||
def search_books(self, query):
|
||||
|
@ -106,7 +134,7 @@ class DoubanBookHtmlParser:
|
|||
self.tag_pattern = re.compile("criteria = '(.+)'")
|
||||
|
||||
def parse_book(self, url, book_content):
|
||||
book = MetaRecord(
|
||||
book = DoubanMetaRecord(
|
||||
id="",
|
||||
title="",
|
||||
authors=[],
|
||||
|
@ -200,3 +228,10 @@ class DoubanBookHtmlParser:
|
|||
if not text:
|
||||
text = self.get_text(element.getnext(), default_str)
|
||||
return text if text else default_str
|
||||
|
||||
|
||||
@meta.route("/metadata/douban_cover", methods=["GET"])
|
||||
def proxy_douban_cover():
|
||||
cover_url = urllib.parse.unquote(request.args.get('cover'))
|
||||
res = requests.get(cover_url, headers=DEFAULT_HEADERS)
|
||||
return Response(res.content, mimetype=res.headers['Content-Type'])
|
||||
|
|
|
@ -1,7 +1,13 @@
|
|||
import requests
|
||||
|
||||
from NewDouban import NewDouban
|
||||
|
||||
if __name__ == "__main__":
|
||||
douban = NewDouban()
|
||||
result = douban.search("知识考古学")
|
||||
for book in result:
|
||||
print(book)
|
||||
# douban = NewDouban()
|
||||
# result = douban.search("知识考古学")
|
||||
# for book in result:
|
||||
# print(book)
|
||||
|
||||
res = requests.get('http://127.0.0.1:8083/metadata/douban_cover?cover=https%3A//img1.doubanio.com/view/subject/l/public/s29195878.jpg',
|
||||
timeout=(10, 200), allow_redirects=False)
|
||||
print(res)
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
from mock.mocks import MockCls
|
||||
meta = MockCls()
|
|
@ -0,0 +1,3 @@
|
|||
from mock.mocks import MockCls
|
||||
Response = MockCls
|
||||
request = MockCls()
|
|
@ -0,0 +1,9 @@
|
|||
class MockCls:
|
||||
def __init__(self):
|
||||
self.args = {}
|
||||
|
||||
@staticmethod
|
||||
def route(*args, **kwargs):
|
||||
def inner(path):
|
||||
pass
|
||||
return inner
|
Loading…
Reference in New Issue