Stereo playback on Linux audio players

After the audio provider rework, adjust the audio players to not use the int16 mono downmixed audio unless necessary. Furthermore, the pulseaudio-based player now controls the volume directly through pulseaudio instead of by modifying the buffer. This also reduces latency when changing the volume. The entire set of GetAudio functions is quite messy now. After wangqr's audio rework, it was split into GetAudio and GetInt16MonoAudio functions, but now volume scaling is also necessary. Really, this should go back to a type constructor based system with audio players being allowed to choose what properties out of mono / 16 bytes / int samples / volume they need.
2025-04-11 22:56:02 +02:00 · 2022-08-18 22:44:19 +02:00 · 2022-08-18 22:44:19 +02:00 · 819d90e9b0
commit 819d90e9b0
parent 144860eb59
7 changed files with 217 additions and 37 deletions
--- a/libaegisub/audio/provider.cpp
+++ b/libaegisub/audio/provider.cpp
@ -119,6 +119,47 @@ void AudioProvider::FillBufferInt16Mono(int16_t* buf, int64_t start, int64_t cou
 	free(buff);
 }

+// This entire file has turned into a mess. For now I'm just following the pattern of the wangqr code, but
+// this should really be restructured entirely again. The original type constructor-based system worked very well - it could
+// just give downmix/conversion control to the players instead.
+void AudioProvider::GetAudioWithVolume(void *buf, int64_t start, int64_t count, double volume) const {
+	GetAudio(buf, start, count);
+	if (volume == 1.0) return;
+
+	int64_t n = count * GetChannels();
+
+	if (float_samples) {
+		if (bytes_per_sample == sizeof(float)) {
+			float *buff = reinterpret_cast<float *>(buf);
+			for (int64_t i = 0; i < n; ++i)
+				buff[i] = static_cast<float>(buff[i] * volume);
+		} else if (bytes_per_sample == sizeof(double)) {
+			double *buff = reinterpret_cast<double *>(buf);
+			for (int64_t i = 0; i < n; ++i)
+				buff[i] = buff[i] * volume;
+		}
+	}
+	else {
+		if (bytes_per_sample == sizeof(uint8_t)) {
+			uint8_t *buff = reinterpret_cast<uint8_t *>(buf);
+			for (int64_t i = 0; i < n; ++i)
+				buff[i] = util::mid(0, static_cast<int>(((int) buff[i] - 128) * volume + 128), 0xFF);
+		} else if (bytes_per_sample == sizeof(int16_t)) {
+			int16_t *buff = reinterpret_cast<int16_t *>(buf);
+			for (int64_t i = 0; i < n; ++i)
+				buff[i] = util::mid(-0x8000, static_cast<int>(buff[i] * volume), 0x7FFF);
+		} else if (bytes_per_sample == sizeof(int32_t)) {
+			int32_t *buff = reinterpret_cast<int32_t *>(buf);
+			for (int64_t i = 0; i < n; ++i)
+				buff[i] = static_cast<int32_t>(buff[i] * volume);
+		} else if (bytes_per_sample == sizeof(int64_t)) {
+			int64_t *buff = reinterpret_cast<int64_t *>(buf);
+			for (int64_t i = 0; i < n; ++i)
+				buff[i] = static_cast<int64_t>(buff[i] * volume);
+		}
+	}
+}
+
 void AudioProvider::GetInt16MonoAudioWithVolume(int16_t *buf, int64_t start, int64_t count, double volume) const {
 	GetInt16MonoAudio(buf, start, count);
 	if (volume == 1.0) return;
@ -261,4 +302,4 @@ void SaveAudioClip(AudioProvider const& provider, fs::path const& path, int star
 		out.write(buf);
 	}
 }
-}
+}
--- a/libaegisub/include/libaegisub/audio/provider.h
+++ b/libaegisub/include/libaegisub/audio/provider.h
@ -45,6 +45,7 @@ public:
 	virtual ~AudioProvider() = default;

 	void GetAudio(void *buf, int64_t start, int64_t count) const;
+	void GetAudioWithVolume(void *buf, int64_t start, int64_t count, double volume) const;
 	void GetInt16MonoAudio(int16_t* buf, int64_t start, int64_t count) const;
 	void GetInt16MonoAudioWithVolume(int16_t *buf, int64_t start, int64_t count, double volume) const;

--- a/src/audio_player_alsa.cpp
+++ b/src/audio_player_alsa.cpp
@ -79,6 +79,7 @@ class AlsaPlayer final : public AudioPlayer {
 	std::atomic<double> volume{1.0};
 	int64_t start_position = 0;
 	std::atomic<int64_t> end_position{0};
+	bool fallback_mono16 = false;	// whether to convert to 16 bit mono. FIXME: more flexible conversion

 	std::mutex position_mutex;
 	int64_t last_position = 0;
@ -88,6 +89,8 @@ class AlsaPlayer final : public AudioPlayer {

 	std::thread thread;

+	snd_pcm_format_t GetPCMFormat(const agi::AudioProvider *provider);
+
 	void PlaybackThread();

 	void UpdatePlaybackPosition(snd_pcm_t *pcm, int64_t position)
@ -115,6 +118,36 @@ public:
 	void SetEndPosition(int64_t pos) override;
 };

+snd_pcm_format_t AlsaPlayer::GetPCMFormat(const agi::AudioProvider *provider) {
+	if (provider->AreSamplesFloat()) {
+		switch (provider->GetBytesPerSample()) {
+			case 4:
+				return SND_PCM_FORMAT_FLOAT_LE;
+			case 8:
+				return SND_PCM_FORMAT_FLOAT64_LE;
+			default:
+				fallback_mono16 = true;
+				return SND_PCM_FORMAT_S16_LE;
+		}
+	} else {
+		switch (provider->GetBytesPerSample()) {
+			case 1:
+				return SND_PCM_FORMAT_U8;
+			case 2:
+				return SND_PCM_FORMAT_S16_LE;
+			case 3:
+				return SND_PCM_FORMAT_S24_LE;
+			case 4:
+				return SND_PCM_FORMAT_S32_LE;
+			case 8:
+				return SND_PCM_FORMAT_S32_LE;
+			default:
+				fallback_mono16 = true;
+				return SND_PCM_FORMAT_S16_LE;
+		}
+	}
+}
+
 void AlsaPlayer::PlaybackThread()
 {
 	std::unique_lock<std::mutex> lock(mutex);
@ -126,24 +159,11 @@ void AlsaPlayer::PlaybackThread()
 	BOOST_SCOPE_EXIT_ALL(&) { snd_pcm_close(pcm); };

 do_setup:
-	snd_pcm_format_t pcm_format;
-	switch (/*provider->GetBytesPerSample()*/ sizeof(int16_t))
-	{
-	case 1:
-		LOG_D("audio/player/alsa") << "format U8";
-		pcm_format = SND_PCM_FORMAT_U8;
-		break;
-	case 2:
-		LOG_D("audio/player/alsa") << "format S16_LE";
-		pcm_format = SND_PCM_FORMAT_S16_LE;
-		break;
-	default:
-		return;
-	}
+	snd_pcm_format_t pcm_format = GetPCMFormat(provider);
 	if (snd_pcm_set_params(pcm,
 	                       pcm_format,
 	                       SND_PCM_ACCESS_RW_INTERLEAVED,
-	                       /*provider->GetChannels()*/ 1,
+	                       fallback_mono16 ? 1 : provider->GetChannels(),
 	                       provider->GetSampleRate(),
 	                       1, // allow resample
 	                       100*1000 // 100 milliseconds latency
@ -151,8 +171,7 @@ do_setup:
 		return;
 	LOG_D("audio/player/alsa") << "set pcm params";

-	//size_t framesize = provider->GetChannels() * provider->GetBytesPerSample();
-	size_t framesize = sizeof(int16_t);
+	size_t framesize = fallback_mono16 ? sizeof(int16_t) : provider->GetChannels() * provider->GetBytesPerSample();

 	while (true)
 	{
@ -176,7 +195,11 @@ do_setup:
 		{
 			auto avail = std::min(snd_pcm_avail(pcm), (snd_pcm_sframes_t)(end_position-position));
 			decode_buffer.resize(avail * framesize);
-			provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(decode_buffer.data()), position, avail, volume);
+			if (fallback_mono16) {
+				provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(decode_buffer.data()), position, avail, volume);
+			} else {
+				provider->GetAudioWithVolume(decode_buffer.data(), position, avail, volume);
+			}

 			snd_pcm_sframes_t written = 0;
 			while (written <= 0)
@ -236,7 +259,11 @@ do_setup:

 			{
 				decode_buffer.resize(avail * framesize);
-				provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(decode_buffer.data()), position, avail, volume);
+				if (fallback_mono16) {
+					provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(decode_buffer.data()), position, avail, volume);
+				} else {
+					provider->GetAudioWithVolume(decode_buffer.data(), position, avail, volume);
+				}
 				snd_pcm_sframes_t written = 0;
 				while (written <= 0)
 				{
@ -353,4 +380,4 @@ std::unique_ptr<AudioPlayer> CreateAlsaPlayer(agi::AudioProvider *provider, wxWi
 	return agi::make_unique<AlsaPlayer>(provider);
 }

-#endif // WITH_ALSA
+#endif // WITH_ALSA
--- a/src/audio_player_openal.cpp
+++ b/src/audio_player_openal.cpp
@ -71,6 +71,8 @@ class OpenALPlayer final : public AudioPlayer, wxTimer {
 	float volume = 1.f; ///< Current audio volume
 	ALsizei samplerate; ///< Sample rate of the audio
 	int bpf; ///< Bytes per frame
+	bool fallback_mono16 = false; ///< whether to fall back to int16 mono. FIXME: More flexible conversion
+	int format; ///< AL format (stereo/mono, 8/16 bit)

 	int64_t start_frame = 0; ///< First frame of playbacka
 	int64_t cur_frame = 0; ///< Next frame to write to playback buffers
@ -125,8 +127,39 @@ public:
 OpenALPlayer::OpenALPlayer(agi::AudioProvider *provider)
 : AudioPlayer(provider)
 , samplerate(provider->GetSampleRate())
-, bpf(/*provider->GetChannels() * provider->GetBytesPerSample()*/sizeof(int16_t))
 {
+	switch (provider->GetChannels()) {
+		case 1:
+			switch (provider->GetBytesPerSample()) {
+				case 1:
+					format = AL_FORMAT_MONO8;
+					break;
+				case 2:
+					format = AL_FORMAT_MONO16;
+					break;
+				default:
+					format = AL_FORMAT_MONO16;
+					fallback_mono16 = true;
+			}
+			break;
+		case 2:
+			switch (provider->GetBytesPerSample()) {
+				case 1:
+					format = AL_FORMAT_STEREO8;
+					break;
+				case 2:
+					format = AL_FORMAT_STEREO16;
+					break;
+				default:
+					format = AL_FORMAT_MONO16;
+					fallback_mono16 = true;
+			}
+			break;
+		default:
+			format = AL_FORMAT_MONO16;
+			fallback_mono16 = true;
+	}
+	bpf = fallback_mono16 ? sizeof(int16_t) : provider->GetChannels() * provider->GetBytesPerSample();
 	device = alcOpenDevice(nullptr);
 	if (!device) throw AudioPlayerOpenError("Failed opening default OpenAL device");

@ -239,16 +272,21 @@ void OpenALPlayer::FillBuffers(ALsizei count)
 	for (count = mid(1, count, buffers_free); count > 0; --count) {
 		ALsizei fill_len = mid<ALsizei>(0, decode_buffer.size() / bpf, end_frame - cur_frame);

-		if (fill_len > 0)
+		if (fill_len > 0) {
 			// Get fill_len frames of audio
-			provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(decode_buffer.data()), cur_frame, fill_len, volume);
+			if (fallback_mono16) {
+				provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(decode_buffer.data()), cur_frame, fill_len, volume);
+			} else {
+				provider->GetAudioWithVolume(decode_buffer.data(), cur_frame, fill_len, volume);
+			}
+		}
 		if ((size_t)fill_len * bpf < decode_buffer.size())
 			// And zerofill the rest
 			memset(&decode_buffer[fill_len * bpf], 0, decode_buffer.size() - fill_len * bpf);

 		cur_frame += fill_len;

-		alBufferData(buffers[buf_first_free], AL_FORMAT_MONO16, &decode_buffer[0], decode_buffer.size(), samplerate);
+		alBufferData(buffers[buf_first_free], format, &decode_buffer[0], decode_buffer.size(), samplerate);
 		alSourceQueueBuffers(source, 1, &buffers[buf_first_free]); // FIXME: collect buffer handles and queue all at once instead of one at a time?
 		buf_first_free = (buf_first_free + 1) % num_buffers;
 		--buffers_free;
@ -308,4 +346,4 @@ std::unique_ptr<AudioPlayer> CreateOpenALPlayer(agi::AudioProvider *provider, wx
 	return agi::make_unique<OpenALPlayer>(provider);
 }

-#endif // WITH_OPENAL
+#endif // WITH_OPENAL
--- a/src/audio_player_portaudio.cpp
+++ b/src/audio_player_portaudio.cpp
@ -64,6 +64,32 @@ static const PaHostApiTypeId pa_host_api_priority[] = {
 };
 static const size_t pa_host_api_priority_count = sizeof(pa_host_api_priority) / sizeof(pa_host_api_priority[0]);

+PaSampleFormat PortAudioPlayer::GetSampleFormat(agi::AudioProvider *provider) {
+	if (provider->AreSamplesFloat()) {
+		switch (provider->GetBytesPerSample()) {
+			case 4:
+				return paFloat32;
+			default:
+				fallback_mono16 = true;
+				return paInt16;
+		}
+	} else {
+		switch (provider->GetBytesPerSample()) {
+			case 1:
+				return paUInt8;
+			case 2:
+				return paInt16;
+			case 3:
+				return paInt24;
+			case 4:
+				return paInt32;
+			default:
+				fallback_mono16 = true;
+				return paInt16;
+		}
+	}
+}
+
 PortAudioPlayer::PortAudioPlayer(agi::AudioProvider *provider) : AudioPlayer(provider) {
 	PaError err = Pa_Initialize();

@ -140,8 +166,8 @@ void PortAudioPlayer::OpenStream() {
 		const PaDeviceInfo *device_info = Pa_GetDeviceInfo((*device_ids)[i]);
 		PaStreamParameters pa_output_p;
 		pa_output_p.device = (*device_ids)[i];
-		pa_output_p.channelCount = /*provider->GetChannels()*/ 1;
-		pa_output_p.sampleFormat = paInt16;
+		pa_output_p.sampleFormat = GetSampleFormat(provider);
+		pa_output_p.channelCount = fallback_mono16 ? 1 : provider->GetChannels();
 		pa_output_p.suggestedLatency = device_info->defaultLowOutputLatency;
 		pa_output_p.hostApiSpecificStreamInfo = nullptr;

@ -222,7 +248,11 @@ int PortAudioPlayer::paCallback(const void *inputBuffer, void *outputBuffer,

 	// Play something
 	if (lenAvailable > 0) {
-		player->provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(outputBuffer), player->current, lenAvailable, player->GetVolume());
+		if (player->fallback_mono16) {
+			player->provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(outputBuffer), player->current, lenAvailable, player->GetVolume());
+		} else {
+			player->provider->GetAudioWithVolume(outputBuffer, player->current, lenAvailable, player->GetVolume());
+		}

 		// Set play position
 		player->current += lenAvailable;
@ -283,4 +313,4 @@ std::unique_ptr<AudioPlayer> CreatePortAudioPlayer(agi::AudioProvider *provider,
 	return agi::make_unique<PortAudioPlayer>(provider);
 }

-#endif // WITH_PORTAUDIO
+#endif // WITH_PORTAUDIO
--- a/src/audio_player_portaudio.h
+++ b/src/audio_player_portaudio.h
@ -64,6 +64,7 @@ class PortAudioPlayer final : public AudioPlayer {
 	PaTime pa_start;     ///< PortAudio internal start position

 	PaStream *stream = nullptr; ///< PortAudio stream
+	bool fallback_mono16 = false; 	///< whether to fall back to 16 bit mono

 	/// @brief PortAudio callback, used to fill buffer for playback, and prime the playback buffer.
 	/// @param inputBuffer     Input buffer.
@ -87,6 +88,8 @@ class PortAudioPlayer final : public AudioPlayer {
 	/// @param userData Local data to be handed to the callback.
 	static void paStreamFinishedCallback(void *userData);

+	PaSampleFormat GetSampleFormat(agi::AudioProvider *provider);
+
 	/// Gather the list of output devices supported by a host API
 	/// @param host_idx Host API ID
 	void GatherDevices(PaHostApiIndex host_idx);
--- a/src/audio_player_pulse.cpp
+++ b/src/audio_player_pulse.cpp
@ -48,7 +48,7 @@

 namespace {
 class PulseAudioPlayer final : public AudioPlayer {
-	float volume = 1.f;
+	pa_cvolume volume;
 	bool is_playing = false;

 	volatile unsigned long start_frame = 0;
@ -56,6 +56,7 @@ class PulseAudioPlayer final : public AudioPlayer {
 	volatile unsigned long end_frame = 0;

 	unsigned long bpf = 0; // bytes per frame
+	bool fallback_mono16 = false;	// whether to convert to 16 bit mono. FIXME: more flexible conversion

 	wxSemaphore context_notify{0, 1};
 	wxSemaphore stream_notify{0, 1};
@ -73,6 +74,7 @@ class PulseAudioPlayer final : public AudioPlayer {

 	int paerror = 0;

+	static void pa_setvolume_success(pa_context *c, int success, PulseAudioPlayer *thread);
 	/// Called by PA to notify about other context-related stuff
 	static void pa_context_notify(pa_context *c, PulseAudioPlayer *thread);
 	/// Called by PA when a stream operation completes
@ -82,6 +84,8 @@ class PulseAudioPlayer final : public AudioPlayer {
 	/// Called by PA to notify about other stream-related stuff
 	static void pa_stream_notify(pa_stream *p, PulseAudioPlayer *thread);

+	/// Find the sample format and set fallback_mono16 if necessary
+	pa_sample_format_t GetSampleFormat(const agi::AudioProvider *provider);
 public:
 	PulseAudioPlayer(agi::AudioProvider *provider);
 	~PulseAudioPlayer();
@ -94,9 +98,35 @@ public:
 	int64_t GetCurrentPosition();
 	void SetEndPosition(int64_t pos);

-	void SetVolume(double vol) { volume = vol; }
+	void SetVolume(double vol);
 };

+pa_sample_format_t PulseAudioPlayer::GetSampleFormat(const agi::AudioProvider *provider) {
+	if (provider->AreSamplesFloat()) {
+		switch (provider->GetBytesPerSample()) {
+			case 4:
+				return PA_SAMPLE_FLOAT32LE;
+			default:
+				fallback_mono16 = true;
+				return PA_SAMPLE_S16LE;
+		}
+	} else {
+		switch (provider->GetBytesPerSample()) {
+			case 1:
+				return PA_SAMPLE_U8;
+			case 2:
+				return PA_SAMPLE_S16LE;
+			case 3:
+				return PA_SAMPLE_S24LE;
+			case 4:
+				return PA_SAMPLE_S32LE;
+			default:
+				fallback_mono16 = true;
+				return PA_SAMPLE_S16LE;
+		}
+	}
+}
+
 PulseAudioPlayer::PulseAudioPlayer(agi::AudioProvider *provider) : AudioPlayer(provider) {
 	// Initialise a mainloop
 	mainloop = pa_threaded_mainloop_new();
@ -133,13 +163,14 @@ PulseAudioPlayer::PulseAudioPlayer(agi::AudioProvider *provider) : AudioPlayer(p
 	}

 	// Set up stream
-	bpf = /*provider->GetChannels() * provider->GetBytesPerSample()*/sizeof(int16_t);
 	pa_sample_spec ss;
-	ss.format = PA_SAMPLE_S16LE; // FIXME
+	ss.format = GetSampleFormat(provider);
+	bpf = fallback_mono16 ? sizeof(int16_t) : provider->GetChannels() * provider->GetBytesPerSample();
 	ss.rate = provider->GetSampleRate();
-	ss.channels = /*provider->GetChannels()*/1;
+	ss.channels = fallback_mono16 ? 1 : provider->GetChannels();
 	pa_channel_map map;
 	pa_channel_map_init_auto(&map, ss.channels, PA_CHANNEL_MAP_DEFAULT);
+	pa_cvolume_init(&volume);

 	stream = pa_stream_new(context, "Sound", &ss, &map);
 	if (!stream) {
@ -269,6 +300,11 @@ int64_t PulseAudioPlayer::GetCurrentPosition()
 	return start_frame + playtime * provider->GetSampleRate() / (1000*1000);
 }

+void PulseAudioPlayer::SetVolume(double vol) {
+	pa_cvolume_set(&volume, fallback_mono16 ? 1 : provider->GetChannels(), pa_sw_volume_from_linear(vol));
+	pa_context_set_sink_input_volume(context, pa_stream_get_index(stream), &volume, nullptr, nullptr);
+}
+
 /// @brief Called by PA to notify about other context-related stuff
 void PulseAudioPlayer::pa_context_notify(pa_context *c, PulseAudioPlayer *thread)
 {
@ -308,7 +344,11 @@ void PulseAudioPlayer::pa_stream_write(pa_stream *p, size_t length, PulseAudioPl
 	unsigned long maxframes = thread->end_frame - thread->cur_frame;
 	if (frames > maxframes) frames = maxframes;
 	void *buf = malloc(frames * bpf);
-	thread->provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(buf), thread->cur_frame, frames, thread->volume);
+	if (thread->fallback_mono16) {
+		thread->provider->GetInt16MonoAudio(reinterpret_cast<int16_t *>(buf), thread->cur_frame, frames);
+	} else {
+		thread->provider->GetAudio(buf, thread->cur_frame, frames);
+	}
 	::pa_stream_write(p, buf, frames*bpf, free, 0, PA_SEEK_RELATIVE);
 	thread->cur_frame += frames;
 }
@ -324,4 +364,4 @@ void PulseAudioPlayer::pa_stream_notify(pa_stream *p, PulseAudioPlayer *thread)
 std::unique_ptr<AudioPlayer> CreatePulseAudioPlayer(agi::AudioProvider *provider, wxWindow *) {
 	return agi::make_unique<PulseAudioPlayer>(provider);
 }
-#endif // WITH_LIBPULSE
+#endif // WITH_LIBPULSE