Web: Fix <title> extraction in presence of nested <svg>

This commit is contained in:
Valentin Lorentz 2024-11-16 10:50:15 +01:00
parent 8ec873015a
commit fd42132771

View File

@ -1,7 +1,7 @@
### ###
# Copyright (c) 2005, Jeremiah Fincher # Copyright (c) 2005, Jeremiah Fincher
# Copyright (c) 2009, James McCoy # Copyright (c) 2009, James McCoy
# Copyright (c) 2010-2021, Valentin Lorentz # Copyright (c) 2010-2024, Valentin Lorentz
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -59,24 +59,24 @@ class Title(utils.web.HtmlToText):
entitydefs['nbsp'] = ' ' entitydefs['nbsp'] = ' '
def __init__(self): def __init__(self):
self.inTitle = False self.inTitle = False
self.inSvg = False self.inSvg = 0 # counter instead of boolean because svg can be nested
utils.web.HtmlToText.__init__(self) utils.web.HtmlToText.__init__(self)
@property @property
def inHtmlTitle(self): def inHtmlTitle(self):
return self.inTitle and not self.inSvg return self.inTitle and self.inSvg == 0
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
if tag == 'title': if tag == 'title':
self.inTitle = True self.inTitle = True
elif tag == 'svg': elif tag == 'svg':
self.inSvg = True self.inSvg += 1
def handle_endtag(self, tag): def handle_endtag(self, tag):
if tag == 'title': if tag == 'title':
self.inTitle = False self.inTitle = False
elif tag == 'svg': elif tag == 'svg':
self.inSvg = False self.inSvg = max(0, self.inSvg - 1)
def append(self, data): def append(self, data):
if self.inHtmlTitle: if self.inHtmlTitle: