add config options useOembedRegistry and useOembedDiscovery

This commit is contained in:
lodriguez 2025-02-01 15:29:12 +01:00
parent eadac11ab6
commit 427845a358
2 changed files with 33 additions and 23 deletions

View File

@ -95,4 +95,12 @@ conf.registerGlobalValue(Web.fetch, 'timeout',
seconds the bot will wait for the site to respond, when using the 'fetch' seconds the bot will wait for the site to respond, when using the 'fetch'
command in this plugin. If 0, will use socket.defaulttimeout""")) command in this plugin. If 0, will use socket.defaulttimeout"""))
conf.registerGlobalValue(Web, 'useOembedRegistry',
registry.Boolean(False, _("""Determines whether the bot will use the
oembed.com providers registry.""")))
conf.registerGlobalValue(Web, 'useOembedDiscovery',
registry.Boolean(False, _("""Determines whether the bot will use HTML
discovery to find oEmbed endpoints.""")))
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:

View File

@ -287,29 +287,31 @@ class Web(callbacks.PluginRegexp):
First tries the providers registry if enabled, then falls back to First tries the providers registry if enabled, then falls back to
HTML discovery if needed and enabled. HTML discovery if needed and enabled.
""" """
providers = self._loadOEmbedProviders() if self.registryValue('useOembedRegistry'):
for provider in providers: providers = self._loadOEmbedProviders()
for pattern in provider.get('endpoints', []): for provider in providers:
schemes = pattern.get('schemes', []) for pattern in provider.get('endpoints', []):
endpoint = pattern.get('url', '') schemes = pattern.get('schemes', [])
for scheme in schemes: endpoint = pattern.get('url', '')
regex = re.escape(scheme).replace(r'\*', '.*') for scheme in schemes:
if re.match(regex, url): regex = re.escape(scheme).replace(r'\*', '.*')
return endpoint if re.match(regex, url):
try: return endpoint
timeout = self.registryValue('timeout') if self.registryValue('useOembedDiscovery'):
response = utils.web.getUrl(url, timeout=timeout) try:
text = response.decode('utf8', errors='replace') timeout = self.registryValue('timeout')
match = re.search( response = utils.web.getUrl(url, timeout=timeout)
r'<link[^>]+?type="application/json\+oembed"[^>]+?href="([^"]+)"', text = response.decode('utf8', errors='replace')
text, match = re.search(
re.IGNORECASE) r'<link[^>]+?type="application/json\+oembed"[^>]+?href="([^"]+)"',
if match: text,
endpoint = match.group(1) re.IGNORECASE)
endpoint = endpoint.split('?')[0] if match:
return endpoint endpoint = match.group(1)
except Exception as e: endpoint = endpoint.split('?')[0]
self.log.debug(f"Failed to discover oEmbed endpoint in HTML: {e}") return endpoint
except Exception as e:
self.log.debug(f"Failed to discover oEmbed endpoint in HTML: {e}")
return None return None
def getOEmbedTitle(self, url): def getOEmbedTitle(self, url):