New spectrum rendering code, scrapped threading and now caches FFT results instead. Also the visual appearance of the selection in spectrum was improved.

Originally committed to SVN as r731.
This commit is contained in:
Niels Martin Hansen 2007-01-07 04:44:11 +00:00
parent d0a7745d41
commit 9babda0720
5 changed files with 465 additions and 240 deletions

View File

@ -66,6 +66,8 @@ AudioDisplay::AudioDisplay(wxWindow *parent,VideoDisplay *display)
video = NULL; video = NULL;
origImage = NULL; origImage = NULL;
spectrumDisplay = NULL; spectrumDisplay = NULL;
spectrumDisplaySelected = NULL;
spectrumRenderer = NULL;
ScrollBar = NULL; ScrollBar = NULL;
dialogue = NULL; dialogue = NULL;
karaoke = NULL; karaoke = NULL;
@ -112,7 +114,9 @@ AudioDisplay::~AudioDisplay() {
delete provider; delete provider;
delete player; delete player;
delete origImage; delete origImage;
delete spectrumRenderer;
delete spectrumDisplay; delete spectrumDisplay;
delete spectrumDisplaySelected;
delete peak; delete peak;
delete min; delete min;
} }
@ -214,11 +218,6 @@ void AudioDisplay::UpdateImage(bool weak) {
// Draw spectrum // Draw spectrum
if (spectrum) { if (spectrum) {
DrawSpectrum(dc,weak); DrawSpectrum(dc,weak);
// Invert the selection, if any
if (hasSel && selStart < selEnd && Options.AsBool(_T("Audio Spectrum invert selection"))) {
dc.Blit(selStart, 0, selEnd-selStart, h, &dc, selStart, 0, wxSRC_INVERT);
}
} }
// Draw seconds boundaries // Draw seconds boundaries
@ -374,13 +373,14 @@ void AudioDisplay::UpdateImage(bool weak) {
} }
} }
catch (...) { catch (...) {
// FIXME?
} }
} }
} }
// Modified text // Modified text
if (NeedCommit) { if (NeedCommit) {
dc.SetFont(wxFont(9,wxDEFAULT,wxFONTSTYLE_NORMAL,wxFONTWEIGHT_BOLD,false,_T("Verdana"))); dc.SetFont(wxFont(9,wxDEFAULT,wxFONTSTYLE_NORMAL,wxFONTWEIGHT_BOLD,false,_T("Verdana"))); // FIXME: hardcoded font name
dc.SetTextForeground(wxColour(255,0,0)); dc.SetTextForeground(wxColour(255,0,0));
if (selStart <= selEnd) { if (selStart <= selEnd) {
dc.DrawText(_T("Modified"),4,4); dc.DrawText(_T("Modified"),4,4);
@ -409,7 +409,7 @@ void AudioDisplay::UpdateImage(bool weak) {
dc.SetPen(wxSystemSettings::GetColour(wxSYS_COLOUR_BTNTEXT)); dc.SetPen(wxSystemSettings::GetColour(wxSYS_COLOUR_BTNTEXT));
dc.SetTextForeground(wxSystemSettings::GetColour(wxSYS_COLOUR_BTNTEXT)); dc.SetTextForeground(wxSystemSettings::GetColour(wxSYS_COLOUR_BTNTEXT));
wxFont scaleFont; wxFont scaleFont;
scaleFont.SetFaceName(_T("Tahoma")); scaleFont.SetFaceName(_T("Tahoma")); // FIXME: hardcoded font name
scaleFont.SetPointSize(8); scaleFont.SetPointSize(8);
dc.SetFont(scaleFont); dc.SetFont(scaleFont);
@ -503,259 +503,61 @@ void AudioDisplay::DrawWaveform(wxDC &dc,bool weak) {
} }
static int spectrumColorMap[256];
static unsigned short spectrumColorMap16[256];
static bool colorMapsGenerated = false;
//////////////////////////////////////
// Spectrum analyser rendering thread
class SpectrumRendererThread : public wxThread {
public:
SpectrumRendererThread() : wxThread(wxTHREAD_JOINABLE) {
if (Create() != wxTHREAD_NO_ERROR)
throw _T("Error creating Spectrum rendering thread.");
}
int *data; // image data to write to (shared)
int window; // 1 << Options.AsInt(_T("Audio Spectrum Window"))
int firstbar, lastbar; // first and last vertical bar to draw
int w, h; // width and height of canvas
int cutoff; // cutoff frequency
float *base_in; // audio sample data (shared)
int samples; // number of samples per column
int depth; // display bit depth
float scale; // vertical scale of display, exponential, min=0, mid=1, max=8
protected:
wxThread::ExitCode Entry() {
// Pointers to image data
int *write_ptr = data;
unsigned short *write_ptr16 = (unsigned short *)data;
// FFT output data
float *out_r = new float[window]; // real part
float *out_i = new float[window]; // imaginary part
float *power = new float[window]; // calculated signal power
// Prepare constants
const int halfwindow = window/2;
//const int posThres = MAX(1,int(double(halfwindow-cutoff)/double(h)*0.5/scale + 0.5));
const int maxband = (halfwindow-cutoff) * 2/3;
const float mult = float(h)/float(halfwindow-cutoff)/255.f;
// Calculation loop
for (int i = firstbar; i < lastbar; i++) {
__int64 curStart = i*samples-(window/2);
if (curStart < 0) curStart = 0;
// Position input
float *in = base_in + curStart;
// Perform the FFT
FFT fft;
fft.Transform(window,in,out_r,out_i);
// Position pointer
write_ptr = data+i+h*w;
write_ptr16 = ((unsigned short*)data)+(i+h*w);
// The maximum power output from the FFT
// Derived by maximising the result from the DFT function:
// f[u] = sum(x=0,N-1)[ f(x) * exp(-2 * pi * i * u * x) ]
// Where N is the number of samples transformed.
// = N * 2^(B-1) * exp(-2 * pi * i * u * x)
// Maximising by f(x) constant at maximum sample value.
// B is bit-depth of the samples, so 2^(B-1) is the maximum sample value.
// = N * 2^(B-1) * [ cos(-2*pi*u*x) + i sin(-2*pi*u*x) ]
// Expanding using Euler's formula.
// = N * 2^(B-1) * [ cos(2*pi*u*x) - i sin(2*pi*u*x) ]
// cos(-x) = cos(x) and sin(-x) = -sin(x)
// = N * 2^(B-1) * cos(2*pi*u*x) - N * 2^(B-1) * i sin(2*pi*u*x) [A]
// Expand the bracket.
// Now determine the maximum magnitude of [A], letting u be constant and x variable.
// | N * 2^(B-1) * cos(2*pi*u*x) - N * 2^(B-1) * i sin(2*pi*u*x) |
// = sqrt( [N * 2^(B-1) * cos(2*pi*u*x)]^2 + [N * 2^(B-1) * sin(2*pi*u*x)]^2 )
// = sqrt( N^2 * 4^(B-1) * cos^2(2*pi*u*x) + N^2 * 4^(B-1) * sin^2(2*pi*u*x) )
// = sqrt( N^2 * 4^(B-1) * [ cos^2(2*pi*u*x) + sin^2(2*pi*u*x) ] )
// = sqrt( N^2 * 4^(B-1) )
// It's known that sin^2(x) + cos^2(x) = 1.
// = N * 2^(B-1)
int maxpower = (1 << (16 - 1))*256;
// Calculate the signal power over frequency
#if 0
// Logarithmic scale
for (int j = 0; j < window; j++) {
float t = out_r[j]*out_r[j] + out_i[j]*out_i[j];
if (t < 1)
power[j] = 0;
else
power[j] = 10. * log10(t) * 64; // try changing the constant 64 if playing with this
}
maxpower = 10 * log10((float)maxpower);
#elif 1
// "Compressed" scale
double onethirdmaxpower = maxpower / 3, twothirdmaxpower = maxpower * 2/3;
double logoverscale = log(maxpower*8*scale - twothirdmaxpower);
for (int j = 0; j < window; j++) {
// First do a simple linear scale power calculation -- 8 gives a reasonable default scaling
power[j] = sqrt(out_r[j]*out_r[j] + out_i[j]*out_i[j]) * 8 * scale;
if (power[j] > maxpower * 2/3) {
double p = power[j] - twothirdmaxpower;
p = log(p) * onethirdmaxpower / logoverscale;
power[j] = p + twothirdmaxpower;
}
}
#else
// Linear scale
for (int j = 0; j < window; j++) {
power[j] = sqrt(out_r[j]*out_r[j] + out_i[j]*out_i[j]);
}
#endif
#define WRITE_PIXEL \
if (intensity > 255) intensity = 255; \
if (intensity < 0) intensity = 0; \
if (depth == 32) { \
write_ptr -= w; \
*write_ptr = spectrumColorMap[intensity]; \
} \
else if (depth == 16) { \
write_ptr16 -= w; \
*write_ptr16 = spectrumColorMap16[intensity]; \
}
// Decide which rendering algo to use
if (halfwindow-cutoff > h) {
// more than one frequency sample per pixel (vertically compress data)
// pick the largest value per pixel for display
// Iterate over pixels, picking a range of samples for each
for (int j = 0; j < h; j++) {
int sample1 = maxband * j/h + cutoff;
int sample2 = maxband * (j+1)/h + cutoff;
float maxval = 0;
for (int samp = sample1; samp <= sample2; samp++) {
if (power[samp] > maxval) maxval = power[samp];
}
int intensity = int(256 * maxval / maxpower);
WRITE_PIXEL
}
}
else {
// less than one frequency sample per pixel (vertically expand data)
// interpolate between pixels
// can also happen with exactly one sample per pixel, but how often is that?
// Iterate over pixels, picking the nearest power values
for (int j = 0; j < h; j++) {
float ideal = (float)(j+1.)/h * maxband;
float sample1 = power[(int)floor(ideal)+cutoff];
float sample2 = power[(int)ceil(ideal)+cutoff];
float frac = ideal - floor(ideal);
int intensity = int(((1-frac)*sample1 + frac*sample2) / maxpower * 256);
WRITE_PIXEL
}
}
#undef WRITE_PIXEL
}
delete out_r;
delete out_i;
delete power;
return 0;
}
};
////////////////////////// //////////////////////////
// Draw spectrum analyzer // Draw spectrum analyzer
void AudioDisplay::DrawSpectrum(wxDC &finaldc,bool weak) { void AudioDisplay::DrawSpectrum(wxDC &finaldc,bool weak) {
// Spectrum bitmap
if (!weak || !spectrumDisplay || spectrumDisplay->GetWidth() != w || spectrumDisplay->GetHeight() != h) { if (!weak || !spectrumDisplay || spectrumDisplay->GetWidth() != w || spectrumDisplay->GetHeight() != h) {
if (spectrumDisplay) { if (spectrumDisplay) {
delete spectrumDisplay; delete spectrumDisplay;
delete spectrumDisplaySelected;
spectrumDisplay = 0; spectrumDisplay = 0;
spectrumDisplaySelected = 0;
} }
//spectrumDisplay = new wxBitmap(w,h);
weak = false; weak = false;
} }
if (!weak) { if (!weak) {
// Generate colors unsigned char *img = (unsigned char *)malloc(h*w*3); // wxImage requires using malloc
if (!colorMapsGenerated) {
unsigned char r,g,b;
for (int i=0;i<256;i++) {
//hsv_to_rgb(255 - i, 255 - i * 3/10, 255*3/10 + i * 7/10, &r, &g, &b);
hsl_to_rgb(170 + i * 2/3, 128 + i/2, i, &r, &g, &b);
spectrumColorMap[i] = b | (g<<8) | (r<<16);
spectrumColorMap16[i] = ((r>>3)<<11) | ((g>>2)<<5) | b>>3;
}
colorMapsGenerated = true;
}
int depth = wxDisplayDepth();
// Prepare arrays if (!spectrumRenderer)
int cutOff = Options.AsInt(_T("Audio Spectrum Cutoff")); spectrumRenderer = new AudioSpectrum(provider, 1<<Options.AsInt(_T("Audio Spectrum Window")));
int window = 1 << Options.AsInt(_T("Audio Spectrum Window"));
int totalLen = w*samples+window;
float *raw_float = new float[totalLen];
short *raw_int = new short[totalLen];
float *in = raw_float;
// Fill input spectrumRenderer->SetScaling(scale);
__int64 start = Position*samples;
provider->GetAudio(raw_int,start,totalLen);
for (int j=0;j<totalLen;j++) {
raw_float[j] = (float)raw_int[j];
}
delete raw_int;
// For image data // Use a slightly slower, but simple way
int *data = new int[w*h*depth/32]; // Always draw the spectrum for the entire width
// Hack: without those divs by 2 the display is horizontally compressed
spectrumRenderer->RenderRange(Position*samples, (Position+w)*samples, false, img, 0, w, w, h);
////// START OF PARALLELISED CODE ////// // The spectrum bitmap will have been deleted above already, so just make a new one
const int cpu_count = MAX(wxThread::GetCPUCount(), 1); wxImage imgobj(w, h, img, false);
std::vector<SpectrumRendererThread*> threads(cpu_count); spectrumDisplay = new wxBitmap(imgobj);
for (int i = 0; i < cpu_count; i++) { }
// Ugh, way too much data to copy in
threads[i] = new SpectrumRendererThread();
threads[i]->data = data;
threads[i]->window = window;
threads[i]->firstbar = i * w/cpu_count;
threads[i]->lastbar = (i+1) * w/cpu_count;
threads[i]->w = w;
threads[i]->h = h;
threads[i]->cutoff = cutOff;
threads[i]->base_in = raw_float;
threads[i]->samples = samples;
threads[i]->depth = depth;
threads[i]->scale = scale;
threads[i]->Run();
}
// Threads started, wait for them to end
for (int i = 0; i < cpu_count; i++) {
threads[i]->Wait();
delete threads[i];
}
// Clear memory if (hasSel && selStartCap < selEndCap &&
delete raw_float; ((selStartCap > Position && selStartCap < Position+w) ||
(selEndCap > Position && selEndCap < Position+w) ||
// Create image FIXME *BREAKS ON NON-WIN32* (see wx docs) (selStartCap < Position && selEndCap > Position+w)) &&
spectrumDisplay = new wxBitmap((const char*)data,w,h,depth); !spectrumDisplaySelected) {
// There is a visible selection and we don't have a rendered one
// This should be done regardless whether we're "weak" or not
// Assume a few things were already set up when things were first rendered though
unsigned char *img = (unsigned char *)malloc(h*w*3);
spectrumRenderer->RenderRange(Position*samples, (Position+w)*samples, true, img, 0, w, w, h);
wxImage imgobj(w, h, img, false);
spectrumDisplaySelected = new wxBitmap(imgobj);
} }
// Draw // Draw
wxMemoryDC dc; wxMemoryDC dc;
dc.SelectObject(*spectrumDisplay); dc.SelectObject(*spectrumDisplay);
finaldc.Blit(0,0,w,h,&dc,0,0); finaldc.Blit(0,0,w,h,&dc,0,0);
}
if (hasSel && spectrumDisplaySelected && selStartCap < selEndCap) {
dc.SelectObject(*spectrumDisplaySelected);
finaldc.Blit(selStart, 0, selEnd-selStart, h, &dc, selStart, 0);
}
}
////////////////////////// //////////////////////////
// Get selection position // Get selection position
@ -939,8 +741,10 @@ void AudioDisplay::SetFile(wxString file, VideoProvider *vprovider) {
if (player) player->CloseStream(); if (player) player->CloseStream();
delete provider; delete provider;
delete player; delete player;
delete spectrumRenderer;
provider = NULL; provider = NULL;
player = NULL; player = NULL;
spectrumRenderer = NULL;
Reset(); Reset();
loaded = false; loaded = false;

View File

@ -43,6 +43,7 @@
#include <wx/wxprec.h> #include <wx/wxprec.h>
#include "audio_provider.h" #include "audio_provider.h"
#include "audio_player.h" #include "audio_player.h"
#include "audio_spectrum.h"
////////////// //////////////
@ -67,8 +68,11 @@ private:
AssDialogue *dialogue; AssDialogue *dialogue;
VideoDisplay *video; VideoDisplay *video;
AudioSpectrum *spectrumRenderer;
wxBitmap *origImage; wxBitmap *origImage;
wxBitmap *spectrumDisplay; wxBitmap *spectrumDisplay;
wxBitmap *spectrumDisplaySelected;
__int64 PositionSample; __int64 PositionSample;
float scale; float scale;
int samples; int samples;
@ -106,7 +110,7 @@ private:
int scrubLastRate; int scrubLastRate;
void OnPaint(wxPaintEvent &event); void OnPaint(wxPaintEvent &event);
void OnMouseEvent(wxMouseEvent &event); void OnMouseEvent(wxMouseEvent &event);
void OnSize(wxSizeEvent &event); void OnSize(wxSizeEvent &event);
void OnUpdateTimer(wxTimerEvent &event); void OnUpdateTimer(wxTimerEvent &event);
void OnKeyDown(wxKeyEvent &event); void OnKeyDown(wxKeyEvent &event);

327
aegisub/audio_spectrum.cpp Normal file
View File

@ -0,0 +1,327 @@
// Copyright (c) 2005, 2006, Rodrigo Braz Monteiro
// Copyright (c) 2006, 2007, Niels Martin Hansen
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of the Aegisub Group nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// -----------------------------------------------------------------------------
//
// AEGISUB
//
// Website: http://aegisub.cellosoft.com
// Contact: mailto:zeratul@cellosoft.com
//
#include <assert.h>
#include "audio_spectrum.h"
#include "fft.h"
#include "colorspace.h"
#include "options.h"
// Audio spectrum FFT data cache
AudioSpectrumCache::CacheLine AudioSpectrumCache::null_line;
unsigned long AudioSpectrumCache::line_length;
void AudioSpectrumCache::SetLineLength(unsigned long new_length)
{
line_length = new_length;
null_line.resize(new_length, 0);
}
// Bottom level FFT cache, holds actual power data itself
class FinalSpectrumCache : public AudioSpectrumCache {
private:
std::vector<CacheLine> data;
unsigned long start, length; // start and end of range
public:
CacheLine& GetLine(unsigned long i)
{
// This check ought to be redundant
if (i >= start && i-start < length)
return data[i - start];
else
return null_line;
}
FinalSpectrumCache(AudioProvider *provider, unsigned long _start, unsigned long _length)
{
start = _start;
length = _length;
assert(length > 2);
// First fill the data vector with blanks
// Both start and end are included in the range stored, so we have end-start+1 elements
data.resize(length, null_line);
// Start sample number of the next line calculated
// line_length is half of the number of samples used to calculate a line, since half of the output from
// a Fourier transform of real data is redundant, and not interesting for the purpose of creating
// a frequenmcy/power spectrum.
__int64 sample = start * line_length*2;
// Raw sample data
short *raw_sample_data = new short[line_length*2];
float *sample_data = new float[line_length*2];
// Real and imaginary components of the output
float *out_r = new float[line_length*2];
float *out_i = new float[line_length*2];
FFT fft; // TODO: use FFTW instead?
for (unsigned long i = 0; i < length; ++i) {
provider->GetAudio(raw_sample_data, sample, line_length*2);
for (size_t j = 0; j < line_length; ++j) {
sample_data[j*2] = (float)raw_sample_data[j*2];
sample_data[j*2+1] = (float)raw_sample_data[j*2+1];
}
fft.Transform(line_length*2, sample_data, out_r, out_i);
CacheLine &line = data[i];
for (size_t j = 0; j < line_length; ++j) {
line[j] = sqrt(out_r[j]*out_r[j] + out_i[j]*out_i[j]);
}
sample += line_length*2;
}
delete[] raw_sample_data;
delete[] sample_data;
delete[] out_r;
delete[] out_i;
}
virtual ~FinalSpectrumCache()
{
}
};
// Non-bottom-level cache, refers to other caches to do the work
class IntermediateSpectrumCache : public AudioSpectrumCache {
private:
std::vector<AudioSpectrumCache*> sub_caches;
unsigned long start, length, subcache_length;
bool subcaches_are_final;
int depth;
AudioProvider *provider;
public:
CacheLine &GetLine(unsigned long i)
{
if (i >= start && i-start <= length) {
// Determine which sub-cache this line resides in
int subcache = (i-start) / subcache_length;
assert(subcache >= 0 && subcache < sub_caches.size());
if (!sub_caches[subcache]) {
if (subcaches_are_final) {
sub_caches[subcache] = new FinalSpectrumCache(provider, start+subcache*subcache_length, subcache_length);
} else {
sub_caches[subcache] = new IntermediateSpectrumCache(provider, start+subcache*subcache_length, subcache_length, depth+1);
}
}
return sub_caches[subcache]->GetLine(i);
} else {
return null_line;
}
}
IntermediateSpectrumCache(AudioProvider *_provider, unsigned long _start, unsigned long _length, int _depth)
{
provider = _provider;
start = _start;
length = _length;
depth = _depth;
// FIXME: this calculation probably needs tweaking
int num_subcaches = 1;
unsigned long tmp = length;
while (tmp > 0) {
tmp /= 16;
num_subcaches *= 2;
}
subcache_length = length / (num_subcaches-1);
subcaches_are_final = num_subcaches <= 4;
sub_caches.resize(num_subcaches, 0);
}
virtual ~IntermediateSpectrumCache()
{
for (size_t i = 0; i < sub_caches.size(); ++i)
if (sub_caches[i])
delete sub_caches[i];
}
};
// AudioSpectrum
AudioSpectrum::AudioSpectrum(AudioProvider *_provider, unsigned long _line_length)
{
provider = _provider;
line_length = _line_length;
__int64 _num_lines = provider->GetNumSamples() / line_length / 2;
//assert (_num_lines < (1<<31)); // hope it fits into 32 bits...
num_lines = (unsigned long)_num_lines;
AudioSpectrumCache::SetLineLength(line_length);
cache = new IntermediateSpectrumCache(provider, 0, num_lines, 0);
power_scale = 1;
minband = Options.AsInt(_T("Audio Spectrum Cutoff"));
maxband = line_length - minband * 2/3; // TODO: make this customisable?
// Generate colour maps
unsigned char *palptr = colours_normal;
for (int i = 0; i < 256; i++) {
hsl_to_rgb(170 + i * 2/3, 128 + i/2, i, palptr+0, palptr+1, palptr+2);
palptr += 3;
}
palptr = colours_selected;
for (int i = 0; i < 256; i++) {
hsl_to_rgb(170 + i * 2/3, 128 + i/2, i*3/4+64, palptr+0, palptr+1, palptr+2);
palptr += 3;
}
}
AudioSpectrum::~AudioSpectrum()
{
delete cache;
}
void AudioSpectrum::RenderRange(__int64 range_start, __int64 range_end, bool selected, unsigned char *img, int imgleft, int imgwidth, int imgpitch, int imgheight)
{
unsigned long first_line = (unsigned long)(range_start / line_length / 2);
unsigned long last_line = (unsigned long)(range_end / line_length / 2);
unsigned long lines_to_render = last_line - first_line + 1;
float *power = new float[line_length];
int last_imgcol_rendered = -1;
unsigned char *palette;
if (selected)
palette = colours_selected;
else
palette = colours_normal;
for (unsigned long i = first_line; i <= last_line; ++i) {
// Handle horizontal compression and don't unneededly re-render columns
int imgcol = imgleft + imgwidth * (i - first_line) / (last_line - first_line + 1);
if (imgcol <= last_imgcol_rendered)
continue;
AudioSpectrumCache::CacheLine &line = cache->GetLine(i);
int maxpower = (1 << (16 - 1))*256;
// Calculate the signal power over frequency
// "Compressed" scale
double onethirdmaxpower = maxpower / 3, twothirdmaxpower = maxpower * 2/3;
double logoverscale = log(maxpower*8*power_scale - twothirdmaxpower);
for (int j = 0; j < line_length; j++) {
// First do a simple linear scale power calculation -- 8 gives a reasonable default scaling
power[j] = line[j] * 8 * power_scale;
if (power[j] > maxpower * 2/3) {
double p = power[j] - twothirdmaxpower;
p = log(p) * onethirdmaxpower / logoverscale;
power[j] = p + twothirdmaxpower;
}
}
#define WRITE_PIXEL \
img[((imgheight-y-1)*imgpitch+x)*3 + 0] = palette[intensity*3+0]; \
img[((imgheight-y-1)*imgpitch+x)*3 + 1] = palette[intensity*3+1]; \
img[((imgheight-y-1)*imgpitch+x)*3 + 2] = palette[intensity*3+2];
int next_line_imgcol = imgleft + imgwidth * (i - first_line + 1) / (last_line - first_line + 1);
if (next_line_imgcol >= imgpitch)
next_line_imgcol = imgpitch-1;
for (int x = imgcol; x <= next_line_imgcol; ++x) {
// Decide which rendering algo to use
if (maxband - minband > imgheight) {
// more than one frequency sample per pixel (vertically compress data)
// pick the largest value per pixel for display
// Iterate over pixels, picking a range of samples for each
for (int y = 0; y < imgheight; ++y) {
int sample1 = maxband * y/imgheight + minband;
int sample2 = maxband * (y+1)/imgheight + minband;
float maxval = 0;
for (int samp = sample1; samp <= sample2; samp++) {
if (power[samp] > maxval) maxval = power[samp];
}
int intensity = int(256 * maxval / maxpower);
WRITE_PIXEL
}
}
else {
// less than one frequency sample per pixel (vertically expand data)
// interpolate between pixels
// can also happen with exactly one sample per pixel, but how often is that?
// Iterate over pixels, picking the nearest power values
for (int y = 0; y < imgheight; ++y) {
float ideal = (float)(y+1.)/imgheight * maxband;
float sample1 = power[(int)floor(ideal)+minband];
float sample2 = power[(int)ceil(ideal)+minband];
float frac = ideal - floor(ideal);
int intensity = int(((1-frac)*sample1 + frac*sample2) / maxpower * 256);
WRITE_PIXEL
}
}
}
#undef WRITE_PIXEL
}
delete[] power;
}
void AudioSpectrum::SetScaling(float _power_scale)
{
power_scale = _power_scale;
}

89
aegisub/audio_spectrum.h Normal file
View File

@ -0,0 +1,89 @@
// Copyright (c) 2005, 2006, Rodrigo Braz Monteiro
// Copyright (c) 2006, 2007, Niels Martin Hansen
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of the Aegisub Group nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// -----------------------------------------------------------------------------
//
// AEGISUB
//
// Website: http://aegisub.cellosoft.com
// Contact: mailto:zeratul@cellosoft.com
//
#ifndef AUDIO_SPECTRUM_H
#define AUDIO_SPECTRUM_H
#include <wx/wxprec.h>
#include <vector>
#include "audio_provider.h"
// Spectrum cache basically caches the raw result of FFT
class AudioSpectrumCache {
public:
typedef std::vector<float> CacheLine;
virtual CacheLine& GetLine(unsigned long i) = 0;
static void SetLineLength(unsigned long new_length);
virtual ~AudioSpectrumCache() {};
protected:
static CacheLine null_line;
static unsigned long line_length;
};
class AudioSpectrum {
private:
// Data provider
AudioSpectrumCache *cache;
// Colour pallettes
unsigned char colours_normal[256*3];
unsigned char colours_selected[256*3];
AudioProvider *provider;
unsigned long line_length; // number of frequency components per line (half of number of samples)
unsigned long num_lines; // number of lines needed for the audio
float power_scale; // amplification of displayed power
int minband; // smallest frequency band displayed
int maxband; // largest frequency band displayed
public:
AudioSpectrum(AudioProvider *_provider, unsigned long _line_length);
~AudioSpectrum();
void RenderRange(__int64 range_start, __int64 range_end, bool selected, unsigned char *img, int imgleft, int imgwidth, int imgpitch, int imgheight);
void SetScaling(float _power_scale);
};
#endif

View File

@ -6,14 +6,15 @@ Please visit http://aegisub.net to download latest version
- New Aegisub logo. (AMZ) - New Aegisub logo. (AMZ)
- Automation 4 has replaced Automation 3, see the help file for more details (jfs) - Automation 4 has replaced Automation 3, see the help file for more details (jfs)
o Automation 4 Lua uses Lua 5.1 instead of 5.0, meaning some new language features o Automation 4 Lua uses Lua 5.1 instead of 5.0, meaning some new language features
o It is now possible to write macros that manipulates subtitles directly o It is now possible to write macros that manipulate subtitles directly
o Scripts have full access to the entire subtitle file, not just the "Events" section o Scripts have full access to the entire subtitle file, not just the "Events" section
- Support reading SSA/ASS files with intermixed V4 and V4+ Styles sections (jfs) - Support reading SSA/ASS files with intermixed V4 and V4+ Styles sections (jfs)
- Fixed loading of sections with unexpected cases. (AMZ) - Fixed loading of sections with unexpected cases. (AMZ)
- Changes to Audio Spectrum: (jfs) - Changes to Audio Spectrum: (jfs)
o Calculation/drawing code is now multithreaded, meaning it runs faster on SMP (eg. dual-core) systems o The calculated FFT data are now cached, so things should be faster
o Actual signal power is now more accurately represented
o The palette is changed o The palette is changed
o Rendering now more accurately represents actual signal power o The selection is no longer shown by ugly reverse colour but with a different palette instead
o Use vertical zoom slider to amplify/dampen displayed signal strength (useful for better visualisation of quiet sections, or easier picking out the dominating frequencies in noisy sections) o Use vertical zoom slider to amplify/dampen displayed signal strength (useful for better visualisation of quiet sections, or easier picking out the dominating frequencies in noisy sections)
- Plain-text export (jfs) - Plain-text export (jfs)
- The style of the current line is automatically selected when opening the Style Manager (jfs) - The style of the current line is automatically selected when opening the Style Manager (jfs)