60 lines
2.3 KiB
Python
60 lines
2.3 KiB
Python
# -*- python -*-
|
|
import asyncio
|
|
import bs4
|
|
import functools
|
|
import lxml
|
|
import re
|
|
import requests
|
|
|
|
class Plugin:
|
|
_html_regexp = re.compile(r"(https?://[^\"\s>]+)")
|
|
|
|
@asyncio.coroutine
|
|
def chat_message(self, body, nick, from_id, is_admin):
|
|
loop = asyncio.get_event_loop()
|
|
result = {}
|
|
|
|
urls = self._html_regexp.findall(body)
|
|
if urls:
|
|
result["handled"] = True
|
|
mime_types = ("application/xhtml+xml", "application/xml",
|
|
"text/html", "text/xml")
|
|
reply = ""
|
|
for url in urls:
|
|
try:
|
|
req = yield from \
|
|
loop.run_in_executor(None,
|
|
functools.partial(requests.get,
|
|
url,
|
|
stream=True))
|
|
if req.headers["content-type"].startswith(mime_types):
|
|
# Handle a case when no charset is defined for text/html.
|
|
if req.headers["content-type"].startswith("text/") and \
|
|
not "charset=" in req.headers["content-type"]:
|
|
req.encoding = None
|
|
|
|
if not req.encoding:
|
|
req.encoding = req.apparent_encoding
|
|
|
|
contents = title = ""
|
|
for i in req.iter_content(chunk_size=128,
|
|
decode_unicode=True):
|
|
contents += i
|
|
soup = bs4.BeautifulSoup(contents, "lxml")
|
|
if soup and soup.title:
|
|
if soup.title.string == title:
|
|
req.close()
|
|
break
|
|
title = soup.title.string
|
|
|
|
if title:
|
|
if reply:
|
|
reply += "\n"
|
|
reply += "Link: %s" % title
|
|
except Exception as e:
|
|
result["error"] = "Title fetch: %s" % str(e)
|
|
result["reply"] = reply
|
|
else:
|
|
result["handled"] = False
|
|
|
|
return result |