mrmp/hypertext.py

28 lines
620 B
Python
Raw Normal View History

2021-10-09 12:02:20 +00:00
import html.parser
2021-10-10 00:40:21 +00:00
import requests
2021-10-10 01:00:43 +00:00
import msys
2021-10-09 12:02:20 +00:00
class Parser(html.parser.HTMLParser):
def __init__(self):
self.links = []
super().__init__()
def handle_starttag(self, tag, attributes):
if tag == 'a':
self.links.extend(
[v for k, v in attributes if k == 'href'])
2021-10-10 01:00:43 +00:00
class HyperText:
def __init__(self, location):
self.location = location
self.load()
def load(self):
hypertext = requests.get(self.location).content.decode(msys.CHARSET)
parser = Parser()
parser.feed(hypertext)
self.links = parser.links