From f9c8a7f6aa411a4f9a0d05d692b05a80f412e26b Mon Sep 17 00:00:00 2001 From: EleonoreMizo Date: Tue, 15 Dec 2020 20:09:20 +0100 Subject: [PATCH] Spectrum display improvement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit – Selection of different frequency mappings for the spectrum display, from linear (current default) to logarithmic. – Field added in the preference page to select the frequency mapping – New preference key to remember this setting: Audio/Renderer/Spectrum/FreqCurve – Consistent display with high sampling rates (> 48 kHz) – Fixed time shift with high quality settings --- src/audio_display.cpp | 10 ++ src/audio_renderer_spectrum.cpp | 168 ++++++++++++++++++++------- src/audio_renderer_spectrum.h | 33 ++++++ src/libresrc/default_config.json | 3 +- src/libresrc/osx/default_config.json | 3 +- src/preferences.cpp | 4 + 6 files changed, 179 insertions(+), 42 deletions(-) diff --git a/src/audio_display.cpp b/src/audio_display.cpp index 2041715fd..2756ac3ff 100644 --- a/src/audio_display.cpp +++ b/src/audio_display.cpp @@ -760,6 +760,15 @@ void AudioDisplay::ReloadRenderingSettings() spectrum_width[spectrum_quality], spectrum_distance[spectrum_quality]); + // Frequency curve + int64_t spectrum_freq_curve = OPT_GET("Audio/Renderer/Spectrum/FreqCurve")->GetInt(); + spectrum_freq_curve = mid(0, spectrum_freq_curve, 4); + const float spectrum_fref_pos [] = { 0.001f, 0.125f, 0.333f, 0.425f, 0.999f }; + + audio_spectrum_renderer->set_reference_frequency_position ( + spectrum_fref_pos [spectrum_freq_curve] + ); + audio_renderer_provider = std::move(audio_spectrum_renderer); } else @@ -1229,6 +1238,7 @@ void AudioDisplay::OnAudioOpen(agi::AudioProvider *provider) OPT_SUB("Colour/Audio Display/Spectrum", &AudioDisplay::ReloadRenderingSettings, this), OPT_SUB("Colour/Audio Display/Waveform", &AudioDisplay::ReloadRenderingSettings, this), OPT_SUB("Audio/Renderer/Spectrum/Quality", &AudioDisplay::ReloadRenderingSettings, this), + OPT_SUB("Audio/Renderer/Spectrum/FreqCurve", &AudioDisplay::ReloadRenderingSettings, this), }); OnTimingController(); } diff --git a/src/audio_renderer_spectrum.cpp b/src/audio_renderer_spectrum.cpp index 7884f1006..2022a9475 100644 --- a/src/audio_renderer_spectrum.cpp +++ b/src/audio_renderer_spectrum.cpp @@ -100,6 +100,8 @@ AudioSpectrumRenderer::~AudioSpectrumRenderer() void AudioSpectrumRenderer::RecreateCache() { + update_derivation_values (); + #ifdef WITH_FFTW3 if (dft_plan) { @@ -114,7 +116,7 @@ void AudioSpectrumRenderer::RecreateCache() if (provider) { - size_t block_count = (size_t)((provider->GetNumSamples() + (size_t)(1<> derivation_dist); + size_t block_count = (size_t)((provider->GetNumSamples() + ((size_t)1<> derivation_dist); cache = agi::make_unique(block_count, this); #ifdef WITH_FFTW3 @@ -143,20 +145,29 @@ void AudioSpectrumRenderer::OnSetProvider() void AudioSpectrumRenderer::SetResolution(size_t _derivation_size, size_t _derivation_dist) { - if (derivation_dist != _derivation_dist) + if (derivation_dist_user != _derivation_dist) { - derivation_dist = _derivation_dist; - if (cache) - cache->Age(0); + derivation_dist_user = _derivation_dist; + update_derivation_values (); + AgeCache (0); } - if (derivation_size != _derivation_size) + if (derivation_size_user != _derivation_size) { - derivation_size = _derivation_size; + derivation_size_user = _derivation_size; RecreateCache(); } } +void AudioSpectrumRenderer::set_reference_frequency_position (float pos_fref_) +{ + assert (pos_fref_ > 0.f); + assert (pos_fref_ < 1.f); + + pos_fref = pos_fref_; +} + + template void AudioSpectrumRenderer::ConvertToFloat(size_t count, T *dest) { for (size_t si = 0; si < count; ++si) @@ -165,23 +176,56 @@ void AudioSpectrumRenderer::ConvertToFloat(size_t count, T *dest) { } } +void AudioSpectrumRenderer::update_derivation_values () +{ + // Below this sampling rate (Hz), the derivation values are identical to + // the user-provided ones. Otherwise, they are scaled according to the + // ratio between the sampling rates. + // The threshold is set at 50 kHz so with standard rates like 48 kHz, + // the values are kept identical, and scaled with higher standard rates + // like 88.2 or 96 kHz. + constexpr float sample_rate_ref = 50000.f; + + derivation_dist = derivation_dist_user; + derivation_size = derivation_size_user; + + if (provider != nullptr) + { + const int sample_rate = provider->GetSampleRate (); + float mult = float (sample_rate) / sample_rate_ref; + while (mult > 1) + { + ++ derivation_dist; + ++ derivation_size; + mult *= 0.5f; + } + } +} + void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block) { assert(cache); assert(block); - int64_t first_sample = ((int64_t)block_index) << derivation_dist; - provider->GetAudio(&audio_scratch[0], first_sample, 2 << derivation_size); + int64_t first_sample = (((int64_t)block_index) << derivation_dist) - ((int64_t)1 << derivation_size); + provider->GetAudio(audio_scratch.data(), first_sample, 2 << derivation_size); + + // Because the FFTs used here are unnormalized DFTs, we have to compensate + // the possible length difference between derivation_size used in the + // calculations and its user-provided counterpart. Thus, the display is + // kept independent of the sampling rate. + const float scale_fix = + 1.f / sqrtf (float (1 << (derivation_size - derivation_size_user))); #ifdef WITH_FFTW3 ConvertToFloat(2 << derivation_size, dft_input); fftw_execute(dft_plan); - double scale_factor = 9 / sqrt(2 << (derivation_size + 1)); + double scale_factor = scale_fix * 9 / sqrt(2 << (derivation_size + 1)); fftw_complex *o = dft_output; - for (size_t si = 1< 0; --si) + for (size_t si = (size_t)1< 0; --si) { *block++ = log10( sqrt(o[0][0] * o[0][0] + o[0][1] * o[0][1]) * scale_factor + 1 ); o++; @@ -196,7 +240,7 @@ void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block) FFT fft; fft.Transform(2< 0; --si) { @@ -211,6 +255,10 @@ void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block) void AudioSpectrumRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle style) { + // Misc. utility functions + auto floor_int = [] (float val) { return int (floorf (val )); }; + auto round_int = [] (float val) { return int (floorf (val + 0.5f)); }; + if (!cache) return; @@ -231,9 +279,34 @@ void AudioSpectrumRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle const AudioColorScheme *pal = &colors[style]; - /// @todo Make minband and maxband configurable - int minband = 0; - int maxband = 1 << derivation_size; + // Sampling rate, in Hz. + const float sample_rate = float (provider->GetSampleRate ()); + + // Number of FFT bins, excluding the "Nyquist" one + const int nbr_bins = 1 << derivation_size; + + // minband and maxband define an half-open range. + int minband = 1; // Starts at 1, we don't care about showing the DC. + int maxband = std::min ( + round_int (nbr_bins * max_freq / (sample_rate * 0.5f)), + nbr_bins + ); + assert (minband < maxband); + + // Precomputes this once, this will be useful for the log curve. + const float scale_log = logf (maxband / minband); + + // Turns the user-specified 1 kHz position into a ratio between the linear + // and logarithmic curves that we can directly use in the following + // calculations. + assert (pos_fref > 0); + assert (pos_fref < 1); + float b_fref = nbr_bins * freq_ref / (sample_rate * 0.5f); + b_fref = mid (1.f, b_fref, float (maxband - 1)); + const float clin = minband + (maxband - minband) * pos_fref; + const float clog = minband * expf (pos_fref * scale_log); + float log_ratio_calc = (b_fref - clin) / (clog - clin); + log_ratio_calc = mid (0.f, log_ratio_calc, 1.f); // ax = absolute x, absolute to the virtual spectrum bitmap for (int ax = start; ax < end; ++ax) @@ -245,36 +318,51 @@ void AudioSpectrumRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle // Prepare bitmap writing unsigned char *px = imgdata + (imgheight-1) * stride + (ax - start) * 3; - // Scale up or down vertically? - if (imgheight > 1<= imgdata); - assert(px < imgdata + imgheight*stride); - auto ideal = (double)(y+1.)/imgheight * (maxband-minband) + minband; - float sample1 = power[(int)floor(ideal)+minband]; - float sample2 = power[(int)ceil(ideal)+minband]; - float frac = ideal - floor(ideal); - float val = (1-frac)*sample1 + frac*sample2; - pal->map(val*amplitude_scale, px); - px -= stride; + // Bin index is an interpolation between the linear and log curves. + const float pos_rel = float (y + 1) / float (imgheight); + const float b_lin = minband + pos_rel * (maxband - minband); + const float b_log = minband * expf (pos_rel * scale_log); + bin_nxt = b_lin + log_ratio_calc * (b_log - b_lin); } - } - else - { - // Pick greatest - for (int y = 0; y < imgheight; ++y) + + float val = 0; + + // Interpolate between consecutive bins + if (bin_nxt - bin_prv < 2) { - assert(px >= imgdata); - assert(px < imgdata + imgheight*stride); - int sample1 = std::max(0, maxband * y/imgheight + minband); - int sample2 = std::min((1<map(maxval*amplitude_scale, px); - px -= stride; + const int bin_0 = floor_int (bin_cur); + const int bin_1 = std::min (bin_0 + 1, nbr_bins - 1); + const float frac = bin_cur - float (bin_0); + const float v0 = power [bin_0]; + const float v1 = power [bin_1]; + val = v0 + frac * (v1 - v0); } + + // Pick the greatest bin on the interval + else + { + int bin_inf = floor_int ((bin_prv + bin_cur) * 0.5f); + int bin_sup = floor_int ((bin_cur + bin_nxt) * 0.5f); + bin_inf = std::min (bin_inf, nbr_bins - 2); + bin_sup = std::min (bin_sup, nbr_bins - 1); + assert (bin_inf < bin_sup); + val = *std::max_element (&power [bin_inf], &power [bin_sup]); + } + + pal->map (val * amplitude_scale, px); + + px -= stride; + bin_prv = bin_cur; + bin_cur = bin_nxt; } } diff --git a/src/audio_renderer_spectrum.h b/src/audio_renderer_spectrum.h index d4641f37d..919c2804b 100644 --- a/src/audio_renderer_spectrum.h +++ b/src/audio_renderer_spectrum.h @@ -61,10 +61,34 @@ class AudioSpectrumRenderer final : public AudioRendererBitmapProvider { /// Colour tables used for rendering std::vector colors; + /// User-provided value for derivation_size + size_t derivation_size_user = 0; + + /// User-provided value for derivation_dist + size_t derivation_dist_user = 0; + + /// Maximum audible, displayed frequency. Avoids wasting the display space + /// with ultrasonic content at sampling rates > 40 kHz. + float max_freq = 20000.f; + + /// Relative vertical position of the 1 kHz frequency, in (0 ; 1) open range + /// 0 = bottom of the display zone, 1 = top + /// The actual position, as displayed, is limited by the available mapping + /// curves (linear and log). + /// Values close to 0 will give a linear curve, and close to 1 a log curve. + float pos_fref = 1.0f / 3; + + /// Reference frequency which vertical position is constant, Hz. + const float freq_ref = 1000.0f; + /// Binary logarithm of number of samples to use in deriving frequency-power data + /// This could differ from the user-provided value because the actual value + /// used in computations may be scaled, depending on the sampling rate. size_t derivation_size = 0; /// Binary logarithm of number of samples between the start of derivations + /// This could differ from the user-provided value because the actual value + /// used in computations may be scaled, depending on the sampling rate. size_t derivation_dist = 0; /// @brief Reset in response to changing audio provider @@ -90,6 +114,9 @@ class AudioSpectrumRenderer final : public AudioRendererBitmapProvider { template void ConvertToFloat(size_t count, T *dest); + /// @brief Updates the derivation_* after a derivation_*_user change. + void update_derivation_values (); + #ifdef WITH_FFTW3 /// FFTW plan data fftw_plan dft_plan = nullptr; @@ -133,6 +160,12 @@ public: /// is specified too large, it will be clamped to the size. void SetResolution(size_t derivation_size, size_t derivation_dist); + /// @brief Set the vertical relative position of the reference frequency (1 kHz) + /// @param fref_pos_ Vertical position of the 1 kHz frequency. Between 0 and 1, boundaries excluded. + /// + /// A value close to 0 gives a linear display, and close to 1 a logarithmic display. + void set_reference_frequency_position (float pos_fref_); + /// @brief Cleans up the cache /// @param max_size Maximum size in bytes for the cache void AgeCache(size_t max_size) override; diff --git a/src/libresrc/default_config.json b/src/libresrc/default_config.json index 318f8d3ee..a55b2cf57 100644 --- a/src/libresrc/default_config.json +++ b/src/libresrc/default_config.json @@ -71,7 +71,8 @@ "Spectrum" : { "Cutoff" : 0, "Memory Max" : 128, - "Quality" : 1 + "Quality" : 1, + "FreqCurve" : 0 } }, "Snap" : { diff --git a/src/libresrc/osx/default_config.json b/src/libresrc/osx/default_config.json index 59f2ed05f..eb4b0ed89 100644 --- a/src/libresrc/osx/default_config.json +++ b/src/libresrc/osx/default_config.json @@ -71,7 +71,8 @@ "Spectrum" : { "Cutoff" : 0, "Memory Max" : 128, - "Quality" : 1 + "Quality" : 1, + "FreqCurve" : 0 } }, "Snap" : { diff --git a/src/preferences.cpp b/src/preferences.cpp index 7850af5f9..08c7495e7 100644 --- a/src/preferences.cpp +++ b/src/preferences.cpp @@ -383,6 +383,10 @@ void Advanced_Audio(wxTreebook *book, Preferences *parent) { wxArrayString sq_choice(4, sq_arr); p->OptionChoice(spectrum, _("Quality"), sq_choice, "Audio/Renderer/Spectrum/Quality"); + const wxString sc_arr[5] = { _("Linear"), _("Extended"), _("Medium"), _("Compressed"), _("Logarithmic") }; + wxArrayString sc_choice(5, sc_arr); + p->OptionChoice(spectrum, _("Frequency mapping"), sc_choice, "Audio/Renderer/Spectrum/FreqCurve"); + p->OptionAdd(spectrum, _("Cache memory max (MB)"), "Audio/Renderer/Spectrum/Memory Max", 2, 1024); #ifdef WITH_AVISYNTH