2021-10-09 12:02:20 +00:00
|
|
|
import html.parser
|
2021-10-10 00:40:21 +00:00
|
|
|
import requests
|
|
|
|
|
2021-10-10 01:00:43 +00:00
|
|
|
import msys
|
2021-10-09 12:02:20 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Parser(html.parser.HTMLParser):
|
|
|
|
def __init__(self):
|
|
|
|
self.links = []
|
|
|
|
super().__init__()
|
|
|
|
|
|
|
|
def handle_starttag(self, tag, attributes):
|
|
|
|
if tag == 'a':
|
|
|
|
self.links.extend(
|
|
|
|
[v for k, v in attributes if k == 'href'])
|
|
|
|
|
|
|
|
|
2021-10-10 01:00:43 +00:00
|
|
|
class HyperText:
|
|
|
|
def __init__(self, location):
|
|
|
|
self.location = location
|
|
|
|
self.load()
|
|
|
|
|
|
|
|
def load(self):
|
|
|
|
hypertext = requests.get(self.location).content.decode(msys.CHARSET)
|
|
|
|
parser = Parser()
|
|
|
|
parser.feed(hypertext)
|
|
|
|
self.links = parser.links
|
|
|
|
self.archives = [link for link in self.links
|
|
|
|
if link.endswith(msys.ARCHIVE)]
|
|
|
|
self.archive = sorted(self.archives)[-1]
|