24c0e8b42bd8 — Chris Cannam 5 years ago
More test materials. There are a number of changes here that probably should not be merged back.
M .hgignore +1 -0
@@ 14,3 14,4 @@ doc/html
 *.rej
 cov-int
 *~
+test/latency/output

          
M main/main.cpp +17 -2
@@ 101,6 101,7 @@ int main(int argc, char **argv)
     bool help = false;
     bool version = false;
     bool quiet = false;
+    bool delaycomp = true;
 
     bool haveRatio = false;
 

          
@@ 150,6 151,7 @@ int main(int argc, char **argv)
             { "threads",       0, 0, '@' },
             { "quiet",         0, 0, 'q' },
             { "timemap",       1, 0, 'M' },
+            { "no-delay-comp", 0, 0, 'Y' },
             { 0, 0, 0, 0 }
         };
 

          
@@ 186,6 188,7 @@ int main(int argc, char **argv)
         case 'c': crispness = atoi(optarg); break;
         case 'q': quiet = true; break;
         case 'M': mapfile = optarg; break;
+        case 'Y': delaycomp = false; break;
         default:  help = true; break;
         }
     }

          
@@ 247,6 250,7 @@ int main(int argc, char **argv)
         cerr << "         --pitch-hq       In RT mode, use a slower, higher quality pitch shift" << endl;
         cerr << "         --centre-focus   Preserve focus of centre material in stereo" << endl;
         cerr << "                          (at a cost in width and individual channel quality)" << endl;
+        cerr << "         --no-delay-comp  Don't compensate for processing delay in the output" << endl;
         cerr << endl;
         cerr << "  -d<N>, --debug <N>      Select debug level (N = 0,1,2,3); default 0, full 3" << endl;
         cerr << "                          (N.B. debug level 3 includes audible ticks in output)" << endl;

          
@@ 446,7 450,14 @@ int main(int argc, char **argv)
 
     ts.setExpectedInputDuration(size_t(sfinfo.frames));
 
-    int drop = int(ts.getLatency());
+    int drop = 0;
+    int delay = int(ts.getLatency());
+    if (debug > 0) {
+        cerr << "reported output delay = " << delay << endl;
+    }
+    if (delaycomp) {
+        drop = delay;
+    }
     
     float *fbuf = new float[channels * ibs];
     float **ibuf = new float *[channels];

          
@@ 525,7 536,7 @@ int main(int argc, char **argv)
         bool final = (frame + ibs >= sfinfo.frames);
 
         if (debug > 2) {
-            cerr << "count = " << count << ", ibs = " << ibs << ", frame = " << frame << ", frames = " << sfinfo.frames << ", final = " << final << endl;
+            cerr << "\ncount = " << count << ", ibs = " << ibs << ", frame = " << frame << ", frames = " << sfinfo.frames << ", final = " << final << endl;
         }
 
         ts.process(ibuf, count, final);

          
@@ 575,6 586,10 @@ int main(int argc, char **argv)
             cerr << "Pass 2: Processing..." << endl;
         }
 
+        if (debug > 2) {
+            cerr << "in: " << countIn << ", out: " << countOut << ", ratio: " << float(countOut)/float(countIn) << ", ideal output: " << lrint(countIn * ratio) << ", error: " << abs(lrint(countIn * ratio) - int(countOut)) << endl;
+        }
+
 	int p = int((double(frame) * 100.0) / sfinfo.frames);
 	if (p > percent || frame == 0) {
 	    percent = p;

          
M rubberband/RubberBandStretcher.h +1 -1
@@ 415,7 415,7 @@ public:
      * In RealTime mode, the latency may depend on the time and pitch
      * ratio and other options.
      */
-    size_t getLatency() const;
+    int getLatency() const;
 
     /**
      * Change an OptionTransients configuration setting.  This may be

          
M src/RubberBandStretcher.cpp +1 -1
@@ 73,7 73,7 @@ RubberBandStretcher::getPitchScale() con
     return m_d->getPitchScale();
 }
 
-size_t
+int
 RubberBandStretcher::getLatency() const
 {
     return m_d->getLatency();

          
M src/StretcherImpl.cpp +57 -8
@@ 431,6 431,7 @@ RubberBandStretcher::Impl::calculateSize
     double r = getEffectiveRatio();
 
     if (m_realtime) {
+            cerr << "rt" << endl;
 
         if (r < 1) {
             

          
@@ 440,9 441,15 @@ RubberBandStretcher::Impl::calculateSize
             else if (rsb) windowIncrRatio = 4.5;
             else windowIncrRatio = 6;
 
+            if (m_debugLevel > 0) {
+                cerr << "windowIncrRatio = " << windowIncrRatio << endl;
+            }
+
             inputIncrement = int(windowSize / windowIncrRatio);
             outputIncrement = int(floor(inputIncrement * r));
 
+            cerr << "a" << endl;
+            
             // Very long stretch or very low pitch shift
             if (outputIncrement < m_defaultIncrement / 4) {
                 if (outputIncrement < 1) outputIncrement = 1;

          
@@ 462,8 469,16 @@ RubberBandStretcher::Impl::calculateSize
             else if (rsb) windowIncrRatio = 4.5;
             else windowIncrRatio = 8;
 
-            outputIncrement = int(windowSize / windowIncrRatio);
+            if (m_debugLevel > 0) {
+                cerr << "windowIncrRatio = " << windowIncrRatio << endl;
+            }
+            
+//            outputIncrement = int(windowSize / windowIncrRatio);
+            outputIncrement = 256;
             inputIncrement = int(outputIncrement / r);
+
+            cerr << "b" << endl;
+
             while (outputIncrement > 1024 * m_rateMultiple &&
                    inputIncrement > 1) {
                 outputIncrement /= 2;

          
@@ 487,6 502,7 @@ RubberBandStretcher::Impl::calculateSize
         }
 
     } else {
+            cerr << "non-rt" << endl;
 
         if (r < 1) {
             inputIncrement = windowSize / 4;

          
@@ 728,15 744,16 @@ RubberBandStretcher::Impl::configure()
     // start with a swoosh than introduce more latency, and we don't
     // want gaps when the ratio changes.
 
-    if (!m_realtime) {
+//    if (!m_realtime) {
         if (m_debugLevel > 1) {
-            cerr << "Not real time mode: prefilling" << endl;
+//            cerr << "Not real time mode: prefilling" << endl;
+            cerr << "Prefilling with " << m_aWindowSize/2 << " zeros" << endl;
         }
         for (size_t c = 0; c < m_channels; ++c) {
             m_channelData[c]->reset();
             m_channelData[c]->inbuf->zero(m_aWindowSize/2);
         }
-    }
+//    }
 }
 
 

          
@@ 833,13 850,40 @@ RubberBandStretcher::Impl::reconfigure()
     }
 }
 
-size_t
+int
 RubberBandStretcher::Impl::getLatency() const
 {
+    if (!m_realtime) return 0;
+
+    int inIncr = m_increment;
+    int outIncr = int(lrint(m_increment * getEffectiveRatio()));
+
+    // 1.0    2048  512  512  1024  0.5
+    // 1.001  2048  255  255  1023  0.5
+    // 1.2    2048  213  256  790   0.38
+    // 2.0    2048  128  256  1024  0.5
+    // 2.2    2048  116  255  801   0.39
+    // 3.4    2048   75  255  594   0.29
+    
+    
+    // must produce:
+    // 0 when inIncr == 128, outIncr == 256, and m_aWindowSize == 2048
+    // 0 when inIncr == 512, outIncr == 512, and m_aWindowSize == 2048
+
+//    return 0;
     /*
-    if (!m_realtime) return 0;
-    return int((m_aWindowSize/2) / m_pitchScale + 1);
-    */
+    double frac = double(inIncr - outIncr) / outIncr;
+
+    int latency = int(m_aWindowSize * frac);
+    cerr << "latency = " << latency << endl;
+    return latency;
+    */    
+/*    
+    
+    return m_aWindowSize / frac - m_aWindowSize/2;
+*/    
+    return int((m_aWindowSize/2) / m_pitchScale);
+
     return 0;
 }
 

          
@@ 1313,6 1357,11 @@ RubberBandStretcher::Impl::process(const
                 }
 //                cerr << "process: happy with channel " << c << endl;
             }
+            if (m_debugLevel > 2) {
+                cerr << "process: consumed[" << c << "] = " << consumed[c]
+                     << ", samples = " << samples << ", setting allConsumed = "
+                     << allConsumed << endl;
+            }
             if (
 #ifndef NO_THREADING
                 !m_threaded &&

          
M src/StretcherImpl.h +1 -1
@@ 69,7 69,7 @@ public:
     double getTimeRatio() const;
     double getPitchScale() const;
 
-    size_t getLatency() const;
+    int getLatency() const;
 
     void setTransientsOption(Options);
     void setDetectorOption(Options);

          
M src/StretcherProcess.cpp +7 -2
@@ 184,6 184,11 @@ RubberBandStretcher::Impl::consumeChanne
     size_t toWrite = samples;
     size_t writable = inbuf.getWriteSpace();
 
+    if (m_debugLevel > 2) {
+        cerr << "consumeChannel: inbuf has space for " << writable
+             << " samples, we have " << toWrite << endl;
+    }
+    
     bool resampling = resampleBeforeStretching();
 
     const float *input = 0;

          
@@ 455,7 460,7 @@ RubberBandStretcher::Impl::processChunkF
         cerr << "processChunkForChannel: phase reset found, incrs "
              << phaseIncrement << ":" << shiftIncrement << endl;
     }
-
+    
     ChannelData &cd = *m_channelData[c];
 
     if (!cd.draining) {

          
@@ 1051,7 1056,7 @@ RubberBandStretcher::Impl::writeChunk(si
     const int si = shiftIncrement;
 
     if (m_debugLevel > 2) {
-        cerr << "writeChunk(" << channel << ", " << shiftIncrement << ", " << last << ")" << endl;
+        cerr << "writeChunk[" << cd.chunkCount << "](" << channel << ", " << shiftIncrement << ", " << last << ")" << endl;
     }
 
     v_divide(accumulator, windowAccumulator, si);

          
M test/latency/measure.cpp +10 -8
@@ 8,9 8,9 @@ using namespace std;
 
 // Timing measurement for output of non-transient-preserving mode.
 //
-// We know that our file contains two impulses, one near the start (or
-// at least within the first third of the file) and the other toward
-// the end (or at least within the last two-thirds).
+// We know that our file contains three impulses, one in the first
+// quarter of the file, one in the second, and one in the third. (The
+// final quarter is silent.)
 //
 // These impulses are likely to be smeared, so we want to isolate them
 // and find their "middle", i.e. the half-way point between where the

          
@@ 19,7 19,8 @@ using namespace std;
 //
 // Having located the rough middle, we then look for a peak within the
 // area of the middle (the smeared impulse can be asymmetric). This is
-// the peak value within the middle 1/4 of the impulse range.
+// the peak value between slightly before the middle and 3/4 of the
+// way through the impulse region.
 
 int findTransientCentre(const vector<float> &ff, int i0, int i1)
 {

          
@@ 49,7 50,7 @@ int findTransientCentre(const vector<flo
 
     int middle = (i + j) / 2;
 
-    int k0 = i + (j - i) / 4;
+    int k0 = middle - (j - i) / 8;
     int k1 = j - (j - i) / 4;
 
     if (k1 <= k0) {

          
@@ 98,14 99,15 @@ int main(int argc, char **argv)
     vector<float> ff(nf, 0.f);
     sf_readf_float(sndfile, &ff[0], nf);
 
-    // before this we seek the first transient, after it we seek the second:
-    int division = nf/3;
+    int division = nf/4;
 
     int t1 = findTransientCentre(ff, 0, division);
-    int t2 = findTransientCentre(ff, division, nf);
+    int t2 = findTransientCentre(ff, division, division*2);
+    int t3 = findTransientCentre(ff, division*2, nf);
 
     cout << "transient 1 centre @ " << t1 << endl;
     cout << "transient 2 centre @ " << t2 << endl;
+    cout << "transient 3 centre @ " << t3 << endl;
 
     sf_close(sndfile);
     

          
M test/latency/test.sh +40 -13
@@ 4,13 4,21 @@ set -eu
 
 ( cd ../.. ; make )
 
-if [ ! -f in.wav ]; then
-    flac -d in.flac
-fi
+rm -f in.wav
+
+#if [ ! -f in.wav ]; then
+#    flac -d in.flac
+#fi
 
 #sox dirac.wav up.wav pad 100000s 99500s
-#sox -v -0.3 dirac.wav down.wav pad 1000s 995s
+#sox -v -1.0 dirac.wav down.wav pad 1000s 995s
 #sox -m up.wav down.wav testfile.wav
+#cp testfile.wav in.wav
+
+sox dirac.wav 1.wav pad 1000s 
+sox -v -1.0 dirac.wav 2.wav pad 50000s 
+sox dirac.wav 3.wav pad 100000s 50000s
+sox -m 1.wav 2.wav 3.wav in.wav
 
 g++ printpeak.cpp -o printpeak -lsndfile
 g++ measure.cpp -o measure -lsndfile

          
@@ 20,16 28,17 @@ mkdir -p output
 
 (
     
-for timeratio in 0.2 0.4 0.5 0.8 0.999 1.0 1.001 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.95 2.0 2.05 2.1 2.2 3.4 10.0 ; do
 #    for pitchshift in -13 -5 0 5 13 ; do
     for pitchshift in 0 ; do
 	#	for rt in N Y ; do
 	#	for rt in N; do
+#	for rt in Y N; do
 	for rt in Y; do
 #	    for window in L M S ; do
 	    for window in M ; do
 #		for pitchhq in N Y ; do
 		for pitchhq in N ; do
+		    for timeratio in 0.2 0.4 0.5 0.8 0.999 1.0 1.001 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.95 2.0 2.05 2.1 2.2 2.3 2.4 2.5 2.55 2.6 2.7 2.8 2.9 3.0 3.4 4.0 10.0 ; do
 		    echo -n "time $timeratio pitch $pitchshift rt $rt win $window hq $pitchhq -> "
 		    rtopt=""
 		    case $rt in

          
@@ 46,10 55,10 @@ for timeratio in 0.2 0.4 0.5 0.8 0.999 1
 		    esac
 		    outfile="output/t_${timeratio}_${pitchshift}_R=${rt}_W=${window}_P=${pitchhq}.wav"
 		    outdrums="output/d_${timeratio}_${pitchshift}_R=${rt}_W=${window}_P=${pitchhq}.wav"
-		    ../../bin/rubberband $rtopt $winopt $pitchhqopt --no-transients \
+		    ../../bin/rubberband $rtopt $winopt $pitchhqopt --no-delay-comp --no-transients --no-lamination \
 					 --time "$timeratio" \
 					 --pitch "$pitchshift" \
-					 testfile.wav \
+					 in.wav \
 					 "$outfile" -d1 > output/log.txt 2>&1
 		    ../../bin/rubberband $rtopt $winopt $pitchhqopt --no-transients \
 					 --time "$timeratio" \

          
@@ 59,22 68,32 @@ for timeratio in 0.2 0.4 0.5 0.8 0.999 1
 		    fftsize=$(grep 'fft size =' output/log.txt | head -1 | sed 's/^.*fft size = \([0-9]*\).*$/\1/')
 		    inincr=$(grep ', increment =' output/log.txt | head -1 | sed 's/^.*, increment = \([0-9]*\).*$/\1/')
 		    outincr=$(grep 'output increment =' output/log.txt | head -1 | sed 's/^.*output increment = \([0-9]*\).*$/\1/')
+		    delay=$(grep 'reported output delay =' output/log.txt | head -1 | sed 's/^.*output delay = \([0-9]*\).*$/\1/')
 
-		    echo -n "[fftsize $fftsize, in incr $inincr, out incr $outincr] "
+		    echo -n "[fftsize $fftsize, in incr $inincr, out incr $outincr, out delay $delay] "
 
 #		    peak1=$(./printpeak "$outfile" | grep chunk | head -1 | awk '{ print $8; }')
 #		    peak2=$(./printpeak "$outfile" | grep chunk | tail -n +2 | head -1 | awk '{ print $8; }')
+
 		    peak1=$(./measure "$outfile" | grep 'transient 1' | awk '{ print $5; }')
 		    peak2=$(./measure "$outfile" | grep 'transient 2' | awk '{ print $5; }')
+		    peak3=$(./measure "$outfile" | grep 'transient 3' | awk '{ print $5; }')
 		    
 		    exp1=$(echo 1000 "$timeratio" '*' p | dc | sed 's/[.].*$//')
-		    exp2=$(echo 100000 "$timeratio" '*' p | dc | sed 's/[.].*$//')
+		    exp2=$(echo 50000 "$timeratio" '*' p | dc | sed 's/[.].*$//')
+		    exp3=$(echo 100000 "$timeratio" '*' p | dc | sed 's/[.].*$//')
 
+		    exp1=$(($exp1 + $delay))
+		    exp2=$(($exp2 + $delay))
+		    exp3=$(($exp3 + $delay))
+		    
 		    err1=$(($peak1 - $exp1))
 		    err2=$(($peak2 - $exp2))
+		    err3=$(($peak3 - $exp3))
 
 		    abs1=$(echo "$err1" | sed 's/^-//')
 		    abs2=$(echo "$err2" | sed 's/^-//')
+		    abs3=$(echo "$err3" | sed 's/^-//')
 		    
 		    if [ "$abs1" -lt 3 ]; then
 			echo -n "OK ($peak1) "

          
@@ 84,17 103,25 @@ for timeratio in 0.2 0.4 0.5 0.8 0.999 1
 		    fi
 
 		    if [ "$abs2" -lt 3 ]; then
-			echo "OK ($peak2)"
+			echo -n "OK ($peak2) "
 		    else
 			err=$(($peak2 - $exp2))
-			echo "FAIL (exp $exp2, got $peak2, err $err)"
+			echo -n "FAIL (exp $exp2, got $peak2, err $err) "
+		    fi
+
+		    if [ "$abs3" -lt 3 ]; then
+			echo "OK ($peak3)"
+		    else
+			err=$(($peak3 - $exp3))
+			echo "FAIL (exp $exp3, got $peak3, err $err)"
 		    fi
 
 		    rm output/log.txt
-		done
+		    done
+		    echo
 	    done
 	done
     done
 done
-
+    
 ) | tee test.log