using Concentus; using Concentus.Enums; using System; /* * Opus encoding and decoding are VERY important for any real world use of UniVoice as without * encoding the size of audio data is much (over 10x) larger. * For more info see https://www.github.com/adrenak/concentus-unity */ namespace Adrenak.UniVoice.Filters { /// /// A filter that encodes audio using Opus. Use this as an output filter /// to reduce the size of outgoing client audio /// public class ConcentusEncodeFilter : IAudioFilter { public ConcentusFrequencies SamplingFrequency { get; private set; } IOpusEncoder encoder; IResampler resampler; byte[] encodeBuffer; float[] resampleBuffer; int inputDuration; int inputChannels; int inputFrequency; int resamplerChannelCount; float[] bytesToFloats; byte[] floatsToBytes; int resamplerQuality; int encoderComplexity; int encoderBitrate; /// /// Creates a Concentus encode filter /// /// /// The frequency the encoder runs at. /// If the input audio frequency is different from this value, it will be resampled before encode. /// /// Resampler quality [1, 10] /// Encoder complexity [1, 10] /// Encoder bitrate [16000, 256000]. Set to -1 to enable variable bitrate. /// /// The length of the encode buffer. Default is 46080 to fit a large sample /// with frequency 48000, duration 120ms and 2 channels. This should be enough /// for almost all scenarios. Increase if you need more. /// public ConcentusEncodeFilter( ConcentusFrequencies encodeFrequency = ConcentusFrequencies.Frequency_16000, int resamplerQuality = 2, int encoderComplexity = 3, int encoderBitrate = 64000, int encodeBufferLength = 46080 ) { SamplingFrequency = encodeFrequency; this.resamplerQuality = Math.Clamp(resamplerQuality, 1, 10); this.encoderComplexity = Math.Clamp(encoderComplexity, 1, 10); this.encoderBitrate = Math.Clamp(encoderBitrate, 16000, 256000); encodeBuffer = new byte[encodeBufferLength]; } public AudioFrame Run(AudioFrame input) { inputChannels = input.channelCount; inputFrequency = input.frequency; inputDuration = ((input.samples.Length / 4) * 1000) / (input.frequency * input.channelCount); CreateNewResamplerAndEncoderIfNeeded(); Span toEncode; bytesToFloats = Utils.Bytes.BytesToFloats(input.samples); toEncode = bytesToFloats; if (inputFrequency != (int)SamplingFrequency) toEncode = Resample(bytesToFloats); var encodeResult = Encode(toEncode, out Span encoded); if (encodeResult > 0) { floatsToBytes = encoded.ToArray(); return new AudioFrame { timestamp = input.timestamp, channelCount = inputChannels, samples = floatsToBytes, frequency = (int)SamplingFrequency }; } else { return new AudioFrame { timestamp = input.timestamp, channelCount = inputChannels, samples = new byte[0], frequency = (int)SamplingFrequency }; } } void CreateNewResamplerAndEncoderIfNeeded() { if (resampleBuffer == null || resampleBuffer.Length != (int)SamplingFrequency * inputDuration * inputChannels / 1000) resampleBuffer = new float[(int)SamplingFrequency * inputDuration * inputChannels / 1000]; if (resampler == null) { resamplerChannelCount = inputChannels; resampler = ResamplerFactory.CreateResampler(inputChannels, inputFrequency, (int)SamplingFrequency, resamplerQuality); } else { resampler.GetRates(out int in_rate, out int out_rate); if (in_rate != inputFrequency || out_rate != (int)SamplingFrequency || resamplerChannelCount != inputChannels) { resampler.Dispose(); resamplerChannelCount = inputChannels; resampler = ResamplerFactory.CreateResampler(inputChannels, inputFrequency, (int)SamplingFrequency, resamplerQuality); } } if (encoder == null || encoder.SampleRate != (int)SamplingFrequency || encoder.NumChannels != inputChannels) { encoder?.Dispose(); encoder = OpusCodecFactory.CreateEncoder((int)SamplingFrequency, inputChannels, OpusApplication.OPUS_APPLICATION_VOIP); encoder.Complexity = encoderComplexity; if(encoderBitrate == -1) encoder.UseVBR = true; else { encoder.UseVBR = false; encoder.Bitrate = encoderBitrate; } } } Span Resample(Span samples) { // Calculate input and output lengths int in_len = samples.Length / inputChannels; // Input samples per channel int out_len = (int)SamplingFrequency * inputDuration / 1000; // Output samples per channel // Perform resampling into preallocated buffer resampler.ProcessInterleaved(samples, ref in_len, resampleBuffer, ref out_len); // Return only the valid portion of resampled data return resampleBuffer.AsSpan(0, out_len * inputChannels); // Trim to valid samples } int Encode(Span toEncode, out Span encoded) { int frameSize = (int)SamplingFrequency * inputDuration / 1000; // Samples per channel int totalSamples = frameSize * inputChannels; // Total interleaved samples if (toEncode.Length < totalSamples) { encoded = Span.Empty; return 0; } // Use preallocated encodeBuffer int result = encoder.Encode(toEncode.Slice(0, totalSamples), frameSize, encodeBuffer, encodeBuffer.Length); if (result > 0) encoded = encodeBuffer.AsSpan(0, result); // Trim to actual encoded size else encoded = Span.Empty; return result; // Return number of bytes written or 0 on failure } } }