html parsing fixes, i hate you writworthy

master
JoYo 2022-06-08 20:08:53 -04:00
parent 4841d68816
commit d8ef990292
1 changed files with 10 additions and 1 deletions

View File

@ -7,6 +7,7 @@ from shutil import rmtree, copytree, copyfileobj
from tempfile import TemporaryDirectory, NamedTemporaryFile from tempfile import TemporaryDirectory, NamedTemporaryFile
from zipfile import ZipFile from zipfile import ZipFile
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
from urllib.parse import quote
import logging import logging
import re import re
@ -111,7 +112,11 @@ def esoui_parse(url: str):
request = Request(url, headers=HEADERS) request = Request(url, headers=HEADERS)
response = urlopen(request) response = urlopen(request)
response_text = response.read().decode("unicode_escape") response_data = response.read()
# writworthy has some garbage characters on it's page
response_text = response_data[:110000].decode("unicode_escape")
version_line = esoui_version_html.search(response_text).group(0) version_line = esoui_version_html.search(response_text).group(0)
_version = esoui_version_split.split(version_line)[1] _version = esoui_version_split.split(version_line)[1]
_version = version.parse(_version) _version = version.parse(_version)
@ -122,6 +127,10 @@ def esoui_parse(url: str):
response = urlopen(request) response = urlopen(request)
response_text = response.read().decode("unicode_escape") response_text = response.read().decode("unicode_escape")
esoui_dowload_uri = esoui_download.search(response_text).group(0) esoui_dowload_uri = esoui_download.search(response_text).group(0)
esoui_dowload_uri = esoui_dowload_uri.split("?")[0]
esoui_dowload_uri = esoui_dowload_uri.split("https://")[1]
esoui_dowload_uri = quote(esoui_dowload_uri)
esoui_dowload_uri = f"https://{esoui_dowload_uri}"
head_request = Request(esoui_dowload_uri, method="HEAD", headers=HEADERS) head_request = Request(esoui_dowload_uri, method="HEAD", headers=HEADERS)
response = urlopen(head_request) response = urlopen(head_request)
response_text = response.read().decode("unicode_escape") response_text = response.read().decode("unicode_escape")