sf1000/granule.py

130 lines
3.9 KiB
Python

import scipy.io.wavfile
import scipy.signal
import numpy
import argparse
import logging
LOG_LEVELS = [logging.WARNING, logging.INFO, logging.DEBUG]
LOGGER=logging.getLogger(__name__)
def parse_args(*args):
""" get the CLI args """
parser = argparse.ArgumentParser(prog="granule",
description="Extract granules from a wav file")
parser.add_argument('input', nargs='*', help="Input filename")
parser.add_argument('-p', '--playlist', help="Input playlist")
parser.add_argument('-o', '--output', help="Output filename")
parser.add_argument('-t', '--time', type=int, help="Target output length (seconds)")
parser.add_argument("-v", "--verbosity", action="count",
help="increase output verbosity",
default=0)
return parser.parse_args(*args)
def extract_granule(filename, count, num, total):
""" Extracts a granule from the middle of the file
:param[str] filename: Input filename
:param[int] count: Target sample count
:param[int] num: This number in the playlist
:param[int] total: Total playlist length
"""
LOGGER.info("Reading %s", filename)
rate, data = scipy.io.wavfile.read(filename)
LOGGER.debug("rate=%d len=%d shape=%s(%d)", rate, len(data), data.shape, len(data.shape))
# get the mono signal
if len(data.shape) > 1:
data = numpy.mean(data, axis=1)
# adjust length based on sample rate
count = count*rate // 44100
LOGGER.debug("len(data) = %d (%s)", len(data), type(data[0]))
# find the beginning and ending of the song
maxv = max(numpy.max(data), -numpy.min(data))
first = 0
while first < len(data) and abs(data[first]) < maxv/100:
first += 1
last = len(data) - 1
while last > first and abs(data[last]) < maxv/100:
last -= 1
LOGGER.debug("first=%d last=%d size=%d", first, last, len(data))
# find the granule that's approximately num/total into the song
start = (last - count - first) * num // total + first
LOGGER.debug("start=%d", start)
while (start > 0 and not (data[start] <= 0 and data[start + 1] > 0)):
start -= 1
end = min(len(data) - 2, start + count)
# find the nearest zero crossing to the granule end
end_l = end
while (end_l > start and not (data[end_l] <= 0 and data[end_l + 1] > 0)):
end_l -= 1
end_r = end
while (end_r < len(data) - 2 and not (data[end_r] <= 0 and data[end_r + 1] > 0)):
end_r += 1
if end - end_l < end_r - end and end_l > start:
end = end_l
else:
end = end_r
LOGGER.debug("Grain: %.2f %d - %d (%d)", start/len(data), start, end, end - start)
# normalize the amplitude
grain = data[start:end].astype(float)
amplitude = max(numpy.max(grain), -numpy.min(grain))
LOGGER.debug("Amplitude: %.1f", amplitude)
if (amplitude > 0):
grain /= amplitude
# convert to 44100
if rate != 44100:
LOGGER.info("Resampling from %s to %s", rate, 44100)
grain = scipy.signal.resample(grain, len(grain)*44100 // rate)
return grain
def main():
args = parse_args()
logging.basicConfig(level=LOG_LEVELS[min(
args.verbosity, len(LOG_LEVELS) - 1)])
if args.playlist:
with open(args.playlist) as file:
for line in file:
args.input.append(line.strip())
count = 44100 * args.time // len(args.input)
LOGGER.info("n=%d count=%d", len(args.input), count)
output = None
total = len(args.input)
for num, infile in enumerate(args.input):
grain = extract_granule(infile, count, num, total)
if output is None:
output = grain
else:
output = numpy.append(output, grain)
LOGGER.info("Output file: %d samples (%.2f seconds)", len(output), len(output)/44100)
if (args.output):
scipy.io.wavfile.write(args.output, 44100, output)
if __name__ == "__main__":
main()