修复部分书籍丛书放到了author列表
This commit is contained in:
parent
fd49700ba9
commit
10ccb80a36
|
@ -120,9 +120,9 @@ class DoubanBookHtmlParser:
|
||||||
for element in elements:
|
for element in elements:
|
||||||
text = self.get_text(element)
|
text = self.get_text(element)
|
||||||
if text.startswith("作者"):
|
if text.startswith("作者"):
|
||||||
book['authors'].extend([self.get_text(author_element) for author_element in element.findall("..//a")])
|
book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
|
||||||
elif text.startswith("译者"):
|
elif text.startswith("译者"):
|
||||||
book['authors'].extend([self.get_text(author_element) for author_element in element.findall("..//a")])
|
book['authors'].extend([self.get_text(author_element) for author_element in filter(self.author_filter, element.findall("..//a"))])
|
||||||
elif text.startswith("出版社"):
|
elif text.startswith("出版社"):
|
||||||
book['publisher'] = self.get_tail(element)
|
book['publisher'] = self.get_tail(element)
|
||||||
elif text.startswith("副标题"):
|
elif text.startswith("副标题"):
|
||||||
|
@ -148,6 +148,10 @@ class DoubanBookHtmlParser:
|
||||||
def get_rating(self, rating_element):
|
def get_rating(self, rating_element):
|
||||||
return float(self.get_text(rating_element, '0')) / 2
|
return float(self.get_text(rating_element, '0')) / 2
|
||||||
|
|
||||||
|
def author_filter(self, a_element):
|
||||||
|
a_href = a_element.attrib['href']
|
||||||
|
return '/author' in a_href
|
||||||
|
|
||||||
def get_text(self, element, default_str=''):
|
def get_text(self, element, default_str=''):
|
||||||
text = default_str
|
text = default_str
|
||||||
if len(element) and element[0].text:
|
if len(element) and element[0].text:
|
||||||
|
|
Loading…
Reference in New Issue