Browse Source

Cleanups

main
fluffy 3 months ago
parent
commit
3a46e295a8
1 changed files with 23 additions and 9 deletions
  1. +23
    -9
      zenius.py

+ 23
- 9
zenius.py View File

@@ -7,7 +7,6 @@ import queue
import urllib
import urllib.parse
import zipfile
from urllib.request import urlretrieve

import requests
import requests.exceptions
@@ -18,7 +17,8 @@ from frozendict import frozendict
def parse_args(*args):
parser = argparse.ArgumentParser(description="Mirror simfiles from ZIV")

parser.add_argument('categories', type=str, nargs='*', help='ZIV category pages to mirror',
parser.add_argument('categories', type=str, nargs='*',
help='ZIV category pages to mirror',
default=['https://zenius-i-vanisher.com/v5.2/simfiles.php?category=latest20official'])

parser.add_argument('--songdir', type=str, help="Directory to keep songs in", default="songs")
@@ -42,6 +42,7 @@ def parse_args(*args):
@functools.lru_cache()
def retrieve(url, filename, save_headers=None, extract=None, **kwargs):
print(f'Downloading {url} -> {filename}')
remove = False
try:
req = requests.get(url, **kwargs, stream=True)
if req.status_code == 200:
@@ -58,7 +59,8 @@ def retrieve(url, filename, save_headers=None, extract=None, **kwargs):
zip.extractall(extract)

if save_headers:
pickle.dump(req.headers, open(save_headers, 'wb'))
with open(save_headers, 'wb') as data:
pickle.dump(req.headers, data)
elif req.status_code == 304:
print("Not modified")
else:
@@ -67,12 +69,19 @@ def retrieve(url, filename, save_headers=None, extract=None, **kwargs):
print(f'Error downloading: {e.msg}')
except zipfile.BadZipFile:
print(f'Not a zip file: {filename}')
remove = True
except KeyboardInterrupt as e:
print(f'Download aborting...')
if os.path.isfile(filename):
print(f'Removing partial file {filename}')
os.unlink(filename)
print('Download aborting...')
remove = True
raise e
finally:
if remove:
if os.path.isfile(filename):
print(f'Removing {filename}')
os.remove(filename)
if save_headers and os.path.isfile(save_headers):
print(f'Removing {save_headers}')
os.remove(save_headers)

return req.headers

@@ -86,7 +95,8 @@ def get_page(cat_url):
def load_prev_headers(filename, header_file):
req_headers = {}
if os.path.isfile(header_file) and os.path.isfile(filename):
prev_headers = pickle.load(open(header_file, 'rb'))
with open(header_file, 'rb') as data:
prev_headers = pickle.load(data)
if 'etag' in prev_headers:
req_headers['If-None-Match'] = prev_headers['etag']
if 'last-modified' in prev_headers:
@@ -160,7 +170,7 @@ def mirror(cat_url, args):
return group_urls.values()


if __name__ == "__main__":
def main():
args = parse_args()

os.makedirs(args.songdir, exist_ok=True)
@@ -182,3 +192,7 @@ if __name__ == "__main__":
print(f'Scheduling discovered category {url}')
pending.put(url)



if __name__ == "__main__":
main()

Loading…
Cancel
Save