Leech/update scripts for zenius-i-vanisher.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

118 lines
4.1 KiB

  1. import urllib.parse
  2. from urllib.request import urlretrieve
  3. import requests
  4. from bs4 import BeautifulSoup
  5. import urllib
  6. import os.path
  7. import zipfile
  8. import os
  9. import argparse
  10. import pickle
  11. def parse_args(*args):
  12. parser = argparse.ArgumentParser(description="Mirror simfiles from ZIV")
  13. parser.add_argument('categories', type=str, nargs='*', help='ZIV category pages to mirror',
  14. default=['https://zenius-i-vanisher.com/v5.2/simfiles.php?category=latest20official'])
  15. feature = parser.add_mutually_exclusive_group(required=False)
  16. feature.add_argument('--dry-run', '-n',
  17. help="Only perform a dry run; don't send any pings",
  18. dest='dry_run', action='store_true')
  19. feature.add_argument('--no-dry-run',
  20. help="Send pings normally",
  21. dest='dry_run', action='store_false')
  22. feature.set_defaults(dry_run=False)
  23. return parser.parse_args(*args)
  24. def retrieve(url, filename, save_headers=None, **kwargs):
  25. print(f'Downloading {url} -> {filename}')
  26. try:
  27. os.makedirs('zips', exist_ok=True)
  28. req = requests.get(url, **kwargs, stream=True)
  29. if req.status_code == 200:
  30. with open(filename, 'wb') as output:
  31. for chunk in req.iter_content(1024):
  32. output.write(chunk)
  33. with zipfile.ZipFile(filename, 'r') as zip:
  34. songdir = f'songs/{groupname}'
  35. print(f'Extracting into {songdir}')
  36. os.makedirs(songdir, exist_ok=True)
  37. zip.extractall(songdir)
  38. if save_headers:
  39. pickle.dump(req.headers, open(save_headers, 'wb'))
  40. elif req.status_code == 304:
  41. print("Not modified")
  42. else:
  43. print(f"Error: {req.status_code} {req.text}")
  44. except requests.BaseHTTPError as e:
  45. print(f'Error downloading: {e.msg}')
  46. except zipfile.BadZipFile:
  47. print(f'Not a zip file: {filename}')
  48. except KeyboardInterrupt as e:
  49. print(f'Download aborting...')
  50. if os.path.isfile(filename):
  51. print(f'Removing partial file {filename}')
  52. os.unlink(filename)
  53. raise e
  54. return req.headers
  55. def mirror(cat_url, args):
  56. request = requests.get(cat_url)
  57. page = BeautifulSoup(request.text, features="html.parser")
  58. if 'viewsimfilecategory.php' in cat_url:
  59. simgroup = page.find('div', {'class': 'headertop'}).h1
  60. else:
  61. simgroup = None
  62. for row in page.find_all('tr'):
  63. simfile = row.find("a", href=lambda href: href and "viewsimfile.php" in href)
  64. simgroup = simgroup or row.find("a", href=lambda href: href and "viewsimfilecategory.php" in href)
  65. if not (simfile and simgroup):
  66. continue
  67. songname = ' '.join(simfile.get_text().replace('/', '-').split())
  68. groupname = ' '.join(simgroup.get_text().replace('/', '-').split())
  69. print(f"collection: '{groupname}' simfile: '{songname}'")
  70. simlink = simfile['href']
  71. try:
  72. sim_id = urllib.parse.parse_qs(urllib.parse.urlparse(simfile['href']).query)['simfileid'][0]
  73. except KeyError:
  74. print(f"WARNING: no simfileid found on URL {simlink}")
  75. continue
  76. url = f'https://zenius-i-vanisher.com/v5.2/download.php?type=ddrsimfile&simfileid={sim_id}'
  77. if args.dry_run:
  78. print(f"Dry run requested, not downloading {url}")
  79. continue
  80. filename = f'zips/{sim_id}.zip'
  81. headers = f'zips/{sim_id}.headers'
  82. if os.path.isfile(headers):
  83. prev_headers = pickle.load(open(headers, 'rb'))
  84. req_headers = {}
  85. if os.path.isfile(filename):
  86. if 'etag' in prev_headers:
  87. req_headers['If-None-Match'] = prev_headers['etag']
  88. if 'last-modified' in prev_headers:
  89. req_headers['If-Modified-Since'] = prev_headers['last-modified']
  90. else:
  91. prev_headers = {}
  92. retrieve(url, filename, headers=req_headers, save_headers=headers)
  93. if __name__ == "__main__":
  94. args = parse_args()
  95. for url in args.categories:
  96. mirror(url, args)