参与对比工具:
1分半音频解码耗时如下:
| 测试方式 | 初始化耗时(ms) | 转码耗时(ms) |
| java版opus实现 | 1 | 646 |
| jni+libopus 1.1 | 2598 | 349 |
| jni+libopus 1.3 | 未测试 | 未测试 |
64分钟音频解码耗时如下:
| 测试方式 | 初始化耗时(ms) | 转码耗时(ms) |
| java版opus实现 | 40 | 7233 |
| jni+libopus 1.1 | 708 | 7848 |
| jni+libopus 1.3 | 380 | 7356 |
在工程化的时候,使用jni的方式只会加载一次,所以我把初始化的耗时和转码耗时区分出来了。至于1分版音频的初始化耗时和64分钟音频初始化耗时不一致,没有具体去分析。转码耗时其实差别也不算大。
对比两种方式解码出来的波形图差异(图一为libopus,图二为java-opus):


波形图整体差异不大,大概是不同语言的浮点运算差异导致的。
结论:基于项目的可移植性考虑, Concentus 一定是更胜一筹的,不用考虑在本地开发和服务器上so的差异(实际上 club.minnced:opus-java把几个常用平台的so、dll库封装,系统兼容也没有那么复杂)。
使用Concentus解码代码示例:
<dependency>
<groupId>org.concentus</groupId>
<artifactId>Concentus</artifactId>
<version>1.0-RELEASE</version>
</dependency>
public static long opusToWav(String sourceFilePath, String outputFilePath) {
log.info("调用封包opus转wav格式, 入参音频路径[{}], 出参路径[{}]", sourceFilePath, outputFilePath);
FileInputStream fileInputStream = null;
FileOutputStream fileOutputStream = null;
try {
// 估算音频长度, 并尝试写入wav头
long sourceFileSize = FileUtil.size(new File(sourceFilePath));
// 并非严格计算, 已知80字节按照16000解码出320的数据,故x8得到完整数据大小
long decodedFileSize = sourceFileSize / SH_OPUS_AUDIO_PACKAGE_SIZE * SH_OPUS_AUDIO_FRAME_SIZE * 8;
log.info("估算解码后wav文件大小 [{}]", decodedFileSize);
byte[] wavHeader = WavHeader.bytes(decodedFileSize, SH_OPUS_AUDIO_CHANNEL, SH_OPUS_AUDIO_SAMPLE, 16);
// 输入文件流
fileInputStream = new FileInputStream(sourceFilePath);// 输出文件流
new FileOutputStream(outputFilePath);
// 先写入44字节wav头信息
fileOutputStream.write(wavHeader);
// 双猴默认参数
OpusDecoder decoder = new OpusDecoder(SH_OPUS_AUDIO_SAMPLE, SH_OPUS_AUDIO_CHANNEL);
byte[] bytes = new byte[SH_OPUS_AUDIO_PACKAGE_SIZE];
int samples = 0;
while (fileInputStream.read(bytes) != -1) {
byte[] opusBytes = new byte[SH_OPUS_AUDIO_FRAME_SIZE];
int length = bytes.length;
// 封包前两个字节去掉, 生效的是opus的package, 封包格式按照约束的16k 单声道
System.arraycopy(bytes, SH_OPUS_AUDIO_PACKAGE_SIZE - SH_OPUS_AUDIO_FRAME_SIZE, opusBytes, 0,
length - (SH_OPUS_AUDIO_PACKAGE_SIZE - SH_OPUS_AUDIO_FRAME_SIZE));
byte[] dataPacket = new byte[SH_OPUS_AUDIO_SAMPLE];
int samplesDecoded =
decoder.decode(opusBytes, 0, opusBytes.length, dataPacket, 0,
SH_OPUS_AUDIO_SAMPLE / 2, false);
fileOutputStream.write(dataPacket, 0, samplesDecoded * 2);
samples += samplesDecoded;
}
return samples * 1000 / SH_OPUS_AUDIO_SAMPLE;
} catch (IOException | OpusException e) {
log.error("转码失败, 入参音频路径[{}], 出参路径[{}]", sourceFilePath, outputFilePath, e);
return 0;
} finally {
IoUtil.close(fileInputStream);
IoUtil.close(fileOutputStream);
}
package com.aicloud.listen.util;
import cn.hutool.core.io.IoUtil;
import lombok.extern.slf4j.Slf4j;
import java.io.File;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.List;
@Slf4j
public class WavHeader {
private static final int CHANNEL_NUMS = 2;
private static final int SAMPLE_RATE = 16000;
private static final int BIT_PER_SAMPLE = 16;
private static final long MAX_FILE_SIZE = 0xffffffffL - 36;
/**
* 默认参数生成wav头信息
*
* @param inputSize
* @return
*/
public static byte[] defaultValue(long inputSize) {
return bytes(inputSize, CHANNEL_NUMS, SAMPLE_RATE, BIT_PER_SAMPLE);
}
/**
* 校验文件是否符合wav最大要求
* 单独抽取一个方法是为了让文件尽可能写进去, 严进宽出, 一旦超过了可以写入, 但是不允许下次写入
*
* @param inputSize
*/
public static void checkFile(long inputSize) {
if (inputSize > MAX_FILE_SIZE) {
throw new RuntimeException("file data exceeded maximum limit");
}
}
/**
* 修复语音文件(临时工具)
*
* @param audioPath
*/
public static void fixAudio(String audioPath) {
// 判断当前任务是否有有效数据, 如果没有有效数据的话,
File file = new File(audioPath);
long length = file.length();
if (length > 44) {
// 读取wav头, 判断wav头是否正确
// 修改wav头
RandomAccessFile raf = null;
try {
raf = new RandomAccessFile(file, "rw");
raf.seek(40);
int dataSize = raf.readInt();
if (dataSize != length - 44) {
byte[] header = WavHeader.defaultValue(length - 44);
raf.seek(0);
raf.write(header);
}
} catch (Exception e) {
log.error("修复wav文件头信息出错 file:[{}]", file.getAbsolutePath());
} finally {
IoUtil.close(raf);
}
}
}
/**
* 生成wav头数据
*
* @param inputSize raw PCM data
* limit of file size for wave file: < 2^(2*4) - 36 bytes (~4GB)
* @param channelCount number of channels: 1 for mono, 2 for stereo, etc.
* @param sampleRate sample rate of PCM audio
* @param bitsPerSample bits per sample, i.e. 16 for PCM16
* @return
* @see soundfile.sapp.org/doc/WaveFormat
*/
public static byte[] bytes(long inputSize, int channelCount, int sampleRate, int bitsPerSample) {
List data = new ArrayList<>(44);
// WAVE RIFF header
// chunk id
writeStr(data, "RIFF");
// chunk size
writeUInt(data, 36 + inputSize);
// format
writeStr(data, "WAVE");
// SUB CHUNK 1 (FORMAT)
// subchunk 1 id
writeStr(data, "fmt ");
// subchunk 1 size
writeUInt(data, 16);
// audio format (1 = PCM)
writeUShort(data, (short) 1);
// number of channelCount
writeUShort(data, (short) channelCount);
// sample rate
writeUInt(data, sampleRate);
// byte rate
writeUInt(data, sampleRate * channelCount * bitsPerSample / 8);
// block align
writeUShort(data, (short) (channelCount * bitsPerSample / 8));
// bits per sample
writeUShort(data, (short) bitsPerSample);
// SUB CHUNK 2 (AUDIO DATA)
// subchunk 2 id
writeStr(data, "data");
// subchunk 2 size
writeUInt(data, inputSize);
byte[] byteArray = new byte[data.size()];
int index = 0;
for (Byte b : data) {
byteArray[index++] = b;
}
return byteArray;
}
/**
* Writes string in big endian form to an output stream
*
* @param output stream
* @param data string
*/
public static void writeStr(List output, String data) {
for (int i = 0; i < data.length(); i++) {
output.add((byte) data.charAt(i));
}
}
public static void writeUInt(List output, long data) {
output.add((byte) (data));
output.add((byte) (data >> 8));
output.add((byte) (data >> 16));
output.add((byte) (data >> 24));
}
public static void writeUShort(List output, short data) {
output.add((byte) (data));
output.add((byte) (data >> 8));
}
}