package
This commit is contained in:
parent
3eccd76e8c
commit
2eb71e6412
14 changed files with 0 additions and 0 deletions
27
mrmp/hypertext.py
Normal file
27
mrmp/hypertext.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
import html.parser
|
||||
import requests
|
||||
|
||||
CHARSET = 'u8'
|
||||
|
||||
|
||||
class Parser(html.parser.HTMLParser):
|
||||
def __init__(self):
|
||||
self.links = []
|
||||
super().__init__()
|
||||
|
||||
def handle_starttag(self, tag, attributes):
|
||||
if tag == 'a':
|
||||
self.links.extend(
|
||||
[v for k, v in attributes if k == 'href'])
|
||||
|
||||
|
||||
class HyperText:
|
||||
def __init__(self, location):
|
||||
self.location = location
|
||||
self.load()
|
||||
|
||||
def load(self):
|
||||
hypertext = requests.get(self.location).content.decode(CHARSET)
|
||||
parser = Parser()
|
||||
parser.feed(hypertext)
|
||||
self.links = parser.links
|
Loading…
Add table
Add a link
Reference in a new issue