修复部分书籍丛书放到了author列表

This commit is contained in:
Gary 2022-02-19 17:46:18 +08:00
parent fd49700ba9
commit 10ccb80a36
1 changed files with 6 additions and 2 deletions

View File

@ -120,9 +120,9 @@ class DoubanBookHtmlParser:
for element in elements:
text = self.get_text(element)
if text.startswith("作者"):
book['authors'].extend([self.get_text(author_element) for author_element in element.findall("..//a")])
book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
elif text.startswith("译者"):
book['authors'].extend([self.get_text(author_element) for author_element in element.findall("..//a")])
book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
elif text.startswith("出版社"):
book['publisher'] = self.get_tail(element)
elif text.startswith("副标题"):
@ -148,6 +148,10 @@ class DoubanBookHtmlParser:
def get_rating(self, rating_element):
return float(self.get_text(rating_element, '0')) / 2
def author_filter(self, a_element):
a_href = a_element.attrib['href']
return '/author' in a_href
def get_text(self, element, default_str=''):
text = default_str
if len(element) and element[0].text: