Bläddra i källkod

Recursively download source categories

main
fluffy 6 månader sedan
förälder
incheckning
07a057437f
5 ändrade filer med 38 tillägg och 9 borttagningar
  1. +12
    -1
      poetry.lock
  2. +1
    -0
      pyproject.toml
  3. +1
    -1
      update.cmd
  4. +1
    -1
      update.sh
  5. +23
    -6
      zenius.py

+ 12
- 1
poetry.lock Visa fil

@@ -61,6 +61,14 @@ category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"

[[package]]
name = "frozendict"
version = "1.2"
description = "An immutable dictionary"
category = "main"
optional = false
python-versions = "*"

[[package]]
name = "idna"
version = "2.10"
@@ -182,7 +190,7 @@ python-versions = "*"
[metadata]
lock-version = "1.1"
python-versions = "^3.8"
content-hash = "0f6584e05b78d4ffccf43e984311168b3986f441ec25f9d88da120325d1ab54d"
content-hash = "fc8f9dec7eaeff5965a17401468941b04fb4a507450907c935c311ed075432a5"

[metadata.files]
astroid = [
@@ -210,6 +218,9 @@ colorama = [
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
{file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
]
frozendict = [
{file = "frozendict-1.2.tar.gz", hash = "sha256:774179f22db2ef8a106e9c38d4d1f8503864603db08de2e33be5b778230f6e45"},
]
idna = [
{file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"},
{file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"},


+ 1
- 0
pyproject.toml Visa fil

@@ -8,6 +8,7 @@ authors = ["fluffy <fluffy@beesbuzz.biz>"]
python = "^3.8"
beautifulsoup4 = "^4.9.3"
requests = "^2.25.1"
frozendict = "^1.2"

[tool.poetry.dev-dependencies]
autopep8 = "^1.5.5"


+ 1
- 1
update.cmd Visa fil

@@ -1,2 +1,2 @@
python -m poetry install
python -m poetry run python zenius.py --songdir="..\StepMania 5.3 Outfox\Songs" %*
python -m poetry run python zenius.py -r --songdir="..\StepMania 5.3 Outfox\Songs" %*

+ 1
- 1
update.sh Visa fil

@@ -2,5 +2,5 @@

cd "$(dirname $0)"
poetry install
poetry run python3 zenius.py "$@"
poetry run python3 zenius.py -r "$@"


+ 23
- 6
zenius.py Visa fil

@@ -3,6 +3,7 @@ import functools
import os
import os.path
import pickle
import queue
import urllib
import urllib.parse
import zipfile
@@ -11,6 +12,7 @@ from urllib.request import urlretrieve
import requests
import requests.exceptions
from bs4 import BeautifulSoup
from frozendict import frozendict


def parse_args(*args):
@@ -22,9 +24,13 @@ def parse_args(*args):
parser.add_argument('--songdir', type=str, help="Directory to keep songs in", default="songs")
parser.add_argument('--zipdir', type=str, help="Directory to keep downloaded zip files in", default="zips")

parser.add_argument('--recurse', '-r',
help='Recursively fetch the main categories for each song',
action='store_true')

feature = parser.add_mutually_exclusive_group(required=False)
feature.add_argument('--dry-run', '-n',
help="Only perform a dry run; don't send any pings",
help="Only perform a dry run; don't download any files",
dest='dry_run', action='store_true')
feature.add_argument('--no-dry-run',
help="Send pings normally",
@@ -33,16 +39,18 @@ def parse_args(*args):

return parser.parse_args(*args)

@functools.lru_cache()
def retrieve(url, filename, save_headers=None, extract=None, **kwargs):
print(f'Downloading {url} -> {filename}')
try:
req = requests.get(url, **kwargs, stream=True)
if req.status_code == 200:
with open(filename, 'wb') as output:
with open(f'{filename}.part', 'wb') as output:
for chunk in req.iter_content(1024):
output.write(chunk)

os.replace(f'{filename}.part', filename)

if extract:
with zipfile.ZipFile(filename, 'r') as zip:
print(f'Extracting into {extract}')
@@ -135,7 +143,7 @@ def mirror(cat_url, args):
req_headers = load_prev_headers(filename, headers)

retrieve(url, filename, extract=os.path.join(args.songdir, groupname),
headers=req_headers, save_headers=headers)
headers=frozendict(req_headers), save_headers=headers)

for groupname, group_url in group_urls.items():
page = get_page(group_url)
@@ -149,6 +157,8 @@ def mirror(cat_url, args):
retrieve(url, filename, extract=False,
headers=req_headers, save_headers=headers)

return group_urls.values()


if __name__ == "__main__":
args = parse_args()
@@ -156,5 +166,12 @@ if __name__ == "__main__":
os.makedirs(args.songdir, exist_ok=True)
os.makedirs(args.zipdir, exist_ok=True)

for url in args.categories:
mirror(url, args)
seen_cats = set()
categories = args.categories
while categories:
for url in {cat for cat in categories if cat not in seen_cats}:
found = mirror(url, args)
seen_cats.add(url)
if args.recurse:
print(f'Recursively fetching categories: {list(found)}')
categories = found

Laddar…
Avbryt
Spara