# -*- python -*- import asyncio import bs4 import functools import lxml import re import requests class Plugin: _html_regexp = re.compile(r"(https?://[^\"\s>]+)") @asyncio.coroutine def chat_message(self, body, nick, from_id, is_admin): loop = asyncio.get_event_loop() result = {} urls = self._html_regexp.findall(body) if urls: result["handled"] = True mime_types = ("application/xhtml+xml", "application/xml", "text/html", "text/xml") reply = "" for url in urls: try: req = yield from \ loop.run_in_executor(None, functools.partial(requests.get, url, stream=True)) if req.headers["content-type"].startswith(mime_types): # Handle a case when no charset is defined for text/html. if req.headers["content-type"].startswith("text/") and \ not "charset=" in req.headers["content-type"]: req.encoding = None if not req.encoding: req.encoding = req.apparent_encoding contents = title = "" for i in req.iter_content(chunk_size=128, decode_unicode=True): contents += i soup = bs4.BeautifulSoup(contents, "lxml") if soup and soup.title: if soup.title.string == title: req.close() break title = soup.title.string if title: if reply: reply += "\n" reply += "Link: %s" % title except Exception as e: result["error"] = "Title fetch: %s" % str(e) result["reply"] = reply else: result["handled"] = False return result