M .hgignore +1 -0
@@ 14,3 14,4 @@ doc/html
*.rej
cov-int
*~
+test/latency/output
M main/main.cpp +17 -2
@@ 101,6 101,7 @@ int main(int argc, char **argv)
bool help = false;
bool version = false;
bool quiet = false;
+ bool delaycomp = true;
bool haveRatio = false;
@@ 150,6 151,7 @@ int main(int argc, char **argv)
{ "threads", 0, 0, '@' },
{ "quiet", 0, 0, 'q' },
{ "timemap", 1, 0, 'M' },
+ { "no-delay-comp", 0, 0, 'Y' },
{ 0, 0, 0, 0 }
};
@@ 186,6 188,7 @@ int main(int argc, char **argv)
case 'c': crispness = atoi(optarg); break;
case 'q': quiet = true; break;
case 'M': mapfile = optarg; break;
+ case 'Y': delaycomp = false; break;
default: help = true; break;
}
}
@@ 247,6 250,7 @@ int main(int argc, char **argv)
cerr << " --pitch-hq In RT mode, use a slower, higher quality pitch shift" << endl;
cerr << " --centre-focus Preserve focus of centre material in stereo" << endl;
cerr << " (at a cost in width and individual channel quality)" << endl;
+ cerr << " --no-delay-comp Don't compensate for processing delay in the output" << endl;
cerr << endl;
cerr << " -d<N>, --debug <N> Select debug level (N = 0,1,2,3); default 0, full 3" << endl;
cerr << " (N.B. debug level 3 includes audible ticks in output)" << endl;
@@ 446,7 450,14 @@ int main(int argc, char **argv)
ts.setExpectedInputDuration(size_t(sfinfo.frames));
- int drop = int(ts.getLatency());
+ int drop = 0;
+ int delay = int(ts.getLatency());
+ if (debug > 0) {
+ cerr << "reported output delay = " << delay << endl;
+ }
+ if (delaycomp) {
+ drop = delay;
+ }
float *fbuf = new float[channels * ibs];
float **ibuf = new float *[channels];
@@ 525,7 536,7 @@ int main(int argc, char **argv)
bool final = (frame + ibs >= sfinfo.frames);
if (debug > 2) {
- cerr << "count = " << count << ", ibs = " << ibs << ", frame = " << frame << ", frames = " << sfinfo.frames << ", final = " << final << endl;
+ cerr << "\ncount = " << count << ", ibs = " << ibs << ", frame = " << frame << ", frames = " << sfinfo.frames << ", final = " << final << endl;
}
ts.process(ibuf, count, final);
@@ 575,6 586,10 @@ int main(int argc, char **argv)
cerr << "Pass 2: Processing..." << endl;
}
+ if (debug > 2) {
+ cerr << "in: " << countIn << ", out: " << countOut << ", ratio: " << float(countOut)/float(countIn) << ", ideal output: " << lrint(countIn * ratio) << ", error: " << abs(lrint(countIn * ratio) - int(countOut)) << endl;
+ }
+
int p = int((double(frame) * 100.0) / sfinfo.frames);
if (p > percent || frame == 0) {
percent = p;
M rubberband/RubberBandStretcher.h +1 -1
@@ 415,7 415,7 @@ public:
* In RealTime mode, the latency may depend on the time and pitch
* ratio and other options.
*/
- size_t getLatency() const;
+ int getLatency() const;
/**
* Change an OptionTransients configuration setting. This may be
M src/RubberBandStretcher.cpp +1 -1
@@ 73,7 73,7 @@ RubberBandStretcher::getPitchScale() con
return m_d->getPitchScale();
}
-size_t
+int
RubberBandStretcher::getLatency() const
{
return m_d->getLatency();
M src/StretcherImpl.cpp +57 -8
@@ 431,6 431,7 @@ RubberBandStretcher::Impl::calculateSize
double r = getEffectiveRatio();
if (m_realtime) {
+ cerr << "rt" << endl;
if (r < 1) {
@@ 440,9 441,15 @@ RubberBandStretcher::Impl::calculateSize
else if (rsb) windowIncrRatio = 4.5;
else windowIncrRatio = 6;
+ if (m_debugLevel > 0) {
+ cerr << "windowIncrRatio = " << windowIncrRatio << endl;
+ }
+
inputIncrement = int(windowSize / windowIncrRatio);
outputIncrement = int(floor(inputIncrement * r));
+ cerr << "a" << endl;
+
// Very long stretch or very low pitch shift
if (outputIncrement < m_defaultIncrement / 4) {
if (outputIncrement < 1) outputIncrement = 1;
@@ 462,8 469,16 @@ RubberBandStretcher::Impl::calculateSize
else if (rsb) windowIncrRatio = 4.5;
else windowIncrRatio = 8;
- outputIncrement = int(windowSize / windowIncrRatio);
+ if (m_debugLevel > 0) {
+ cerr << "windowIncrRatio = " << windowIncrRatio << endl;
+ }
+
+// outputIncrement = int(windowSize / windowIncrRatio);
+ outputIncrement = 256;
inputIncrement = int(outputIncrement / r);
+
+ cerr << "b" << endl;
+
while (outputIncrement > 1024 * m_rateMultiple &&
inputIncrement > 1) {
outputIncrement /= 2;
@@ 487,6 502,7 @@ RubberBandStretcher::Impl::calculateSize
}
} else {
+ cerr << "non-rt" << endl;
if (r < 1) {
inputIncrement = windowSize / 4;
@@ 728,15 744,16 @@ RubberBandStretcher::Impl::configure()
// start with a swoosh than introduce more latency, and we don't
// want gaps when the ratio changes.
- if (!m_realtime) {
+// if (!m_realtime) {
if (m_debugLevel > 1) {
- cerr << "Not real time mode: prefilling" << endl;
+// cerr << "Not real time mode: prefilling" << endl;
+ cerr << "Prefilling with " << m_aWindowSize/2 << " zeros" << endl;
}
for (size_t c = 0; c < m_channels; ++c) {
m_channelData[c]->reset();
m_channelData[c]->inbuf->zero(m_aWindowSize/2);
}
- }
+// }
}
@@ 833,13 850,40 @@ RubberBandStretcher::Impl::reconfigure()
}
}
-size_t
+int
RubberBandStretcher::Impl::getLatency() const
{
+ if (!m_realtime) return 0;
+
+ int inIncr = m_increment;
+ int outIncr = int(lrint(m_increment * getEffectiveRatio()));
+
+ // 1.0 2048 512 512 1024 0.5
+ // 1.001 2048 255 255 1023 0.5
+ // 1.2 2048 213 256 790 0.38
+ // 2.0 2048 128 256 1024 0.5
+ // 2.2 2048 116 255 801 0.39
+ // 3.4 2048 75 255 594 0.29
+
+
+ // must produce:
+ // 0 when inIncr == 128, outIncr == 256, and m_aWindowSize == 2048
+ // 0 when inIncr == 512, outIncr == 512, and m_aWindowSize == 2048
+
+// return 0;
/*
- if (!m_realtime) return 0;
- return int((m_aWindowSize/2) / m_pitchScale + 1);
- */
+ double frac = double(inIncr - outIncr) / outIncr;
+
+ int latency = int(m_aWindowSize * frac);
+ cerr << "latency = " << latency << endl;
+ return latency;
+ */
+/*
+
+ return m_aWindowSize / frac - m_aWindowSize/2;
+*/
+ return int((m_aWindowSize/2) / m_pitchScale);
+
return 0;
}
@@ 1313,6 1357,11 @@ RubberBandStretcher::Impl::process(const
}
// cerr << "process: happy with channel " << c << endl;
}
+ if (m_debugLevel > 2) {
+ cerr << "process: consumed[" << c << "] = " << consumed[c]
+ << ", samples = " << samples << ", setting allConsumed = "
+ << allConsumed << endl;
+ }
if (
#ifndef NO_THREADING
!m_threaded &&
M src/StretcherImpl.h +1 -1
@@ 69,7 69,7 @@ public:
double getTimeRatio() const;
double getPitchScale() const;
- size_t getLatency() const;
+ int getLatency() const;
void setTransientsOption(Options);
void setDetectorOption(Options);
M src/StretcherProcess.cpp +7 -2
@@ 184,6 184,11 @@ RubberBandStretcher::Impl::consumeChanne
size_t toWrite = samples;
size_t writable = inbuf.getWriteSpace();
+ if (m_debugLevel > 2) {
+ cerr << "consumeChannel: inbuf has space for " << writable
+ << " samples, we have " << toWrite << endl;
+ }
+
bool resampling = resampleBeforeStretching();
const float *input = 0;
@@ 455,7 460,7 @@ RubberBandStretcher::Impl::processChunkF
cerr << "processChunkForChannel: phase reset found, incrs "
<< phaseIncrement << ":" << shiftIncrement << endl;
}
-
+
ChannelData &cd = *m_channelData[c];
if (!cd.draining) {
@@ 1051,7 1056,7 @@ RubberBandStretcher::Impl::writeChunk(si
const int si = shiftIncrement;
if (m_debugLevel > 2) {
- cerr << "writeChunk(" << channel << ", " << shiftIncrement << ", " << last << ")" << endl;
+ cerr << "writeChunk[" << cd.chunkCount << "](" << channel << ", " << shiftIncrement << ", " << last << ")" << endl;
}
v_divide(accumulator, windowAccumulator, si);
M test/latency/measure.cpp +10 -8
@@ 8,9 8,9 @@ using namespace std;
// Timing measurement for output of non-transient-preserving mode.
//
-// We know that our file contains two impulses, one near the start (or
-// at least within the first third of the file) and the other toward
-// the end (or at least within the last two-thirds).
+// We know that our file contains three impulses, one in the first
+// quarter of the file, one in the second, and one in the third. (The
+// final quarter is silent.)
//
// These impulses are likely to be smeared, so we want to isolate them
// and find their "middle", i.e. the half-way point between where the
@@ 19,7 19,8 @@ using namespace std;
//
// Having located the rough middle, we then look for a peak within the
// area of the middle (the smeared impulse can be asymmetric). This is
-// the peak value within the middle 1/4 of the impulse range.
+// the peak value between slightly before the middle and 3/4 of the
+// way through the impulse region.
int findTransientCentre(const vector<float> &ff, int i0, int i1)
{
@@ 49,7 50,7 @@ int findTransientCentre(const vector<flo
int middle = (i + j) / 2;
- int k0 = i + (j - i) / 4;
+ int k0 = middle - (j - i) / 8;
int k1 = j - (j - i) / 4;
if (k1 <= k0) {
@@ 98,14 99,15 @@ int main(int argc, char **argv)
vector<float> ff(nf, 0.f);
sf_readf_float(sndfile, &ff[0], nf);
- // before this we seek the first transient, after it we seek the second:
- int division = nf/3;
+ int division = nf/4;
int t1 = findTransientCentre(ff, 0, division);
- int t2 = findTransientCentre(ff, division, nf);
+ int t2 = findTransientCentre(ff, division, division*2);
+ int t3 = findTransientCentre(ff, division*2, nf);
cout << "transient 1 centre @ " << t1 << endl;
cout << "transient 2 centre @ " << t2 << endl;
+ cout << "transient 3 centre @ " << t3 << endl;
sf_close(sndfile);
M test/latency/test.sh +40 -13
@@ 4,13 4,21 @@ set -eu
( cd ../.. ; make )
-if [ ! -f in.wav ]; then
- flac -d in.flac
-fi
+rm -f in.wav
+
+#if [ ! -f in.wav ]; then
+# flac -d in.flac
+#fi
#sox dirac.wav up.wav pad 100000s 99500s
-#sox -v -0.3 dirac.wav down.wav pad 1000s 995s
+#sox -v -1.0 dirac.wav down.wav pad 1000s 995s
#sox -m up.wav down.wav testfile.wav
+#cp testfile.wav in.wav
+
+sox dirac.wav 1.wav pad 1000s
+sox -v -1.0 dirac.wav 2.wav pad 50000s
+sox dirac.wav 3.wav pad 100000s 50000s
+sox -m 1.wav 2.wav 3.wav in.wav
g++ printpeak.cpp -o printpeak -lsndfile
g++ measure.cpp -o measure -lsndfile
@@ 20,16 28,17 @@ mkdir -p output
(
-for timeratio in 0.2 0.4 0.5 0.8 0.999 1.0 1.001 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.95 2.0 2.05 2.1 2.2 3.4 10.0 ; do
# for pitchshift in -13 -5 0 5 13 ; do
for pitchshift in 0 ; do
# for rt in N Y ; do
# for rt in N; do
+# for rt in Y N; do
for rt in Y; do
# for window in L M S ; do
for window in M ; do
# for pitchhq in N Y ; do
for pitchhq in N ; do
+ for timeratio in 0.2 0.4 0.5 0.8 0.999 1.0 1.001 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.95 2.0 2.05 2.1 2.2 2.3 2.4 2.5 2.55 2.6 2.7 2.8 2.9 3.0 3.4 4.0 10.0 ; do
echo -n "time $timeratio pitch $pitchshift rt $rt win $window hq $pitchhq -> "
rtopt=""
case $rt in
@@ 46,10 55,10 @@ for timeratio in 0.2 0.4 0.5 0.8 0.999 1
esac
outfile="output/t_${timeratio}_${pitchshift}_R=${rt}_W=${window}_P=${pitchhq}.wav"
outdrums="output/d_${timeratio}_${pitchshift}_R=${rt}_W=${window}_P=${pitchhq}.wav"
- ../../bin/rubberband $rtopt $winopt $pitchhqopt --no-transients \
+ ../../bin/rubberband $rtopt $winopt $pitchhqopt --no-delay-comp --no-transients --no-lamination \
--time "$timeratio" \
--pitch "$pitchshift" \
- testfile.wav \
+ in.wav \
"$outfile" -d1 > output/log.txt 2>&1
../../bin/rubberband $rtopt $winopt $pitchhqopt --no-transients \
--time "$timeratio" \
@@ 59,22 68,32 @@ for timeratio in 0.2 0.4 0.5 0.8 0.999 1
fftsize=$(grep 'fft size =' output/log.txt | head -1 | sed 's/^.*fft size = \([0-9]*\).*$/\1/')
inincr=$(grep ', increment =' output/log.txt | head -1 | sed 's/^.*, increment = \([0-9]*\).*$/\1/')
outincr=$(grep 'output increment =' output/log.txt | head -1 | sed 's/^.*output increment = \([0-9]*\).*$/\1/')
+ delay=$(grep 'reported output delay =' output/log.txt | head -1 | sed 's/^.*output delay = \([0-9]*\).*$/\1/')
- echo -n "[fftsize $fftsize, in incr $inincr, out incr $outincr] "
+ echo -n "[fftsize $fftsize, in incr $inincr, out incr $outincr, out delay $delay] "
# peak1=$(./printpeak "$outfile" | grep chunk | head -1 | awk '{ print $8; }')
# peak2=$(./printpeak "$outfile" | grep chunk | tail -n +2 | head -1 | awk '{ print $8; }')
+
peak1=$(./measure "$outfile" | grep 'transient 1' | awk '{ print $5; }')
peak2=$(./measure "$outfile" | grep 'transient 2' | awk '{ print $5; }')
+ peak3=$(./measure "$outfile" | grep 'transient 3' | awk '{ print $5; }')
exp1=$(echo 1000 "$timeratio" '*' p | dc | sed 's/[.].*$//')
- exp2=$(echo 100000 "$timeratio" '*' p | dc | sed 's/[.].*$//')
+ exp2=$(echo 50000 "$timeratio" '*' p | dc | sed 's/[.].*$//')
+ exp3=$(echo 100000 "$timeratio" '*' p | dc | sed 's/[.].*$//')
+ exp1=$(($exp1 + $delay))
+ exp2=$(($exp2 + $delay))
+ exp3=$(($exp3 + $delay))
+
err1=$(($peak1 - $exp1))
err2=$(($peak2 - $exp2))
+ err3=$(($peak3 - $exp3))
abs1=$(echo "$err1" | sed 's/^-//')
abs2=$(echo "$err2" | sed 's/^-//')
+ abs3=$(echo "$err3" | sed 's/^-//')
if [ "$abs1" -lt 3 ]; then
echo -n "OK ($peak1) "
@@ 84,17 103,25 @@ for timeratio in 0.2 0.4 0.5 0.8 0.999 1
fi
if [ "$abs2" -lt 3 ]; then
- echo "OK ($peak2)"
+ echo -n "OK ($peak2) "
else
err=$(($peak2 - $exp2))
- echo "FAIL (exp $exp2, got $peak2, err $err)"
+ echo -n "FAIL (exp $exp2, got $peak2, err $err) "
+ fi
+
+ if [ "$abs3" -lt 3 ]; then
+ echo "OK ($peak3)"
+ else
+ err=$(($peak3 - $exp3))
+ echo "FAIL (exp $exp3, got $peak3, err $err)"
fi
rm output/log.txt
- done
+ done
+ echo
done
done
done
done
-
+
) | tee test.log