From d779ac82d7f79ba83d83a28faa3bfe861a96e61f Mon Sep 17 00:00:00 2001 From: Gary Date: Mon, 3 Oct 2022 10:36:48 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=9C=80=E6=96=B0=E8=B1=86?= =?UTF-8?q?=E7=93=A3=E8=8E=B7=E5=8F=96403=E9=94=99=E8=AF=AF=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/NewDouban.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/NewDouban.py b/src/NewDouban.py index 963d3e5..3cca65d 100644 --- a/src/NewDouban.py +++ b/src/NewDouban.py @@ -9,7 +9,9 @@ from functools import lru_cache from cps.services.Metadata import Metadata, MetaSourceInfo, MetaRecord -DOUBAN_SEARCH_JSON_URL = "https://www.douban.com/j/search" +DOUBAN_SEARCH_JSON_URL = "https://www.douban.com/j/search" # 最新豆瓣屏蔽此url +DOUBAN_SEARCH_URL = "https://www.douban.com/search" +DOUBAN_SEARCH_NEW_MODE = True DOUBAN_BOOK_CAT = "1001" DOUBAN_BOOK_CACHE_SIZE = 500 # 最大缓存数量 DOUBAN_CONCURRENCY_SIZE = 5 # 并发查询数 @@ -65,8 +67,27 @@ class DoubanBookSearcher: book_urls.append(parsed) return book_urls + def load_book_urls_new(self, query): + url = DOUBAN_SEARCH_URL + params = {"cat": DOUBAN_BOOK_CAT, "q": query} + res = requests.get(url, params, headers=DEFAULT_HEADERS) + book_urls = [] + if res.status_code in [200, 201]: + html = etree.HTML(res.content) + alist = html.xpath('//a[@class="nbg"]') + for link in alist: + href = link.attrib['href'] + parsed = self.calc_url(href) + if parsed: + if len(book_urls) < DOUBAN_CONCURRENCY_SIZE: + book_urls.append(parsed) + return book_urls + def search_books(self, query): - book_urls = self.load_book_urls(query) + if DOUBAN_SEARCH_NEW_MODE: + book_urls = self.load_book_urls_new(query) + else: + book_urls = self.load_book_urls(query) books = [] futures = [self.thread_pool.submit(self.book_loader.load_book, book_url) for book_url in book_urls] for future in as_completed(futures):