参与对比工具:
1分半音频解码耗时如下:
测试方式 | 初始化耗时(ms) | 转码耗时(ms) |
java版opus实现 | 1 | 646 |
jni+libopus 1.1 | 2598 | 349 |
jni+libopus 1.3 | 未测试 | 未测试 |
64分钟音频解码耗时如下:
测试方式 | 初始化耗时(ms) | 转码耗时(ms) |
java版opus实现 | 40 | 7233 |
jni+libopus 1.1 | 708 | 7848 |
jni+libopus 1.3 | 380 | 7356 |
在工程化的时候,使用jni的方式只会加载一次,所以我把初始化的耗时和转码耗时区分出来了。至于1分版音频的初始化耗时和64分钟音频初始化耗时不一致,没有具体去分析。转码耗时其实差别也不算大。
对比两种方式解码出来的波形图差异(图一为libopus,图二为java-opus):
波形图整体差异不大,大概是不同语言的浮点运算差异导致的。
结论:基于项目的可移植性考虑, Concentus 一定是更胜一筹的,不用考虑在本地开发和服务器上so的差异(实际上 club.minnced:opus-java把几个常用平台的so、dll库封装,系统兼容也没有那么复杂)。
使用Concentus解码代码示例:
<dependency> <groupId>org.concentus</groupId> <artifactId>Concentus</artifactId> <version>1.0-RELEASE</version> </dependency>
public static long opusToWav(String sourceFilePath, String outputFilePath) { log.info("调用封包opus转wav格式, 入参音频路径[{}], 出参路径[{}]", sourceFilePath, outputFilePath); FileInputStream fileInputStream = null; FileOutputStream fileOutputStream = null; try { // 估算音频长度, 并尝试写入wav头 long sourceFileSize = FileUtil.size(new File(sourceFilePath)); // 并非严格计算, 已知80字节按照16000解码出320的数据,故x8得到完整数据大小 long decodedFileSize = sourceFileSize / SH_OPUS_AUDIO_PACKAGE_SIZE * SH_OPUS_AUDIO_FRAME_SIZE * 8; log.info("估算解码后wav文件大小 [{}]", decodedFileSize); byte[] wavHeader = WavHeader.bytes(decodedFileSize, SH_OPUS_AUDIO_CHANNEL, SH_OPUS_AUDIO_SAMPLE, 16); // 输入文件流 fileInputStream = new FileInputStream(sourceFilePath);// 输出文件流 new FileOutputStream(outputFilePath); // 先写入44字节wav头信息 fileOutputStream.write(wavHeader); // 双猴默认参数 OpusDecoder decoder = new OpusDecoder(SH_OPUS_AUDIO_SAMPLE, SH_OPUS_AUDIO_CHANNEL); byte[] bytes = new byte[SH_OPUS_AUDIO_PACKAGE_SIZE]; int samples = 0; while (fileInputStream.read(bytes) != -1) { byte[] opusBytes = new byte[SH_OPUS_AUDIO_FRAME_SIZE]; int length = bytes.length; // 封包前两个字节去掉, 生效的是opus的package, 封包格式按照约束的16k 单声道 System.arraycopy(bytes, SH_OPUS_AUDIO_PACKAGE_SIZE - SH_OPUS_AUDIO_FRAME_SIZE, opusBytes, 0, length - (SH_OPUS_AUDIO_PACKAGE_SIZE - SH_OPUS_AUDIO_FRAME_SIZE)); byte[] dataPacket = new byte[SH_OPUS_AUDIO_SAMPLE]; int samplesDecoded = decoder.decode(opusBytes, 0, opusBytes.length, dataPacket, 0, SH_OPUS_AUDIO_SAMPLE / 2, false); fileOutputStream.write(dataPacket, 0, samplesDecoded * 2); samples += samplesDecoded; } return samples * 1000 / SH_OPUS_AUDIO_SAMPLE; } catch (IOException | OpusException e) { log.error("转码失败, 入参音频路径[{}], 出参路径[{}]", sourceFilePath, outputFilePath, e); return 0; } finally { IoUtil.close(fileInputStream); IoUtil.close(fileOutputStream); }
package com.aicloud.listen.util; import cn.hutool.core.io.IoUtil; import lombok.extern.slf4j.Slf4j; import java.io.File; import java.io.RandomAccessFile; import java.util.ArrayList; import java.util.List; @Slf4j public class WavHeader { private static final int CHANNEL_NUMS = 2; private static final int SAMPLE_RATE = 16000; private static final int BIT_PER_SAMPLE = 16; private static final long MAX_FILE_SIZE = 0xffffffffL - 36; /** * 默认参数生成wav头信息 * * @param inputSize * @return */ public static byte[] defaultValue(long inputSize) { return bytes(inputSize, CHANNEL_NUMS, SAMPLE_RATE, BIT_PER_SAMPLE); } /** * 校验文件是否符合wav最大要求 * 单独抽取一个方法是为了让文件尽可能写进去, 严进宽出, 一旦超过了可以写入, 但是不允许下次写入 * * @param inputSize */ public static void checkFile(long inputSize) { if (inputSize > MAX_FILE_SIZE) { throw new RuntimeException("file data exceeded maximum limit"); } } /** * 修复语音文件(临时工具) * * @param audioPath */ public static void fixAudio(String audioPath) { // 判断当前任务是否有有效数据, 如果没有有效数据的话, File file = new File(audioPath); long length = file.length(); if (length > 44) { // 读取wav头, 判断wav头是否正确 // 修改wav头 RandomAccessFile raf = null; try { raf = new RandomAccessFile(file, "rw"); raf.seek(40); int dataSize = raf.readInt(); if (dataSize != length - 44) { byte[] header = WavHeader.defaultValue(length - 44); raf.seek(0); raf.write(header); } } catch (Exception e) { log.error("修复wav文件头信息出错 file:[{}]", file.getAbsolutePath()); } finally { IoUtil.close(raf); } } } /** * 生成wav头数据 * * @param inputSize raw PCM data * limit of file size for wave file: < 2^(2*4) - 36 bytes (~4GB) * @param channelCount number of channels: 1 for mono, 2 for stereo, etc. * @param sampleRate sample rate of PCM audio * @param bitsPerSample bits per sample, i.e. 16 for PCM16 * @return * @see soundfile.sapp.org/doc/WaveFormat */ public static byte[] bytes(long inputSize, int channelCount, int sampleRate, int bitsPerSample) { Listdata = new ArrayList<>(44); // WAVE RIFF header // chunk id writeStr(data, "RIFF"); // chunk size writeUInt(data, 36 + inputSize); // format writeStr(data, "WAVE"); // SUB CHUNK 1 (FORMAT) // subchunk 1 id writeStr(data, "fmt "); // subchunk 1 size writeUInt(data, 16); // audio format (1 = PCM) writeUShort(data, (short) 1); // number of channelCount writeUShort(data, (short) channelCount); // sample rate writeUInt(data, sampleRate); // byte rate writeUInt(data, sampleRate * channelCount * bitsPerSample / 8); // block align writeUShort(data, (short) (channelCount * bitsPerSample / 8)); // bits per sample writeUShort(data, (short) bitsPerSample); // SUB CHUNK 2 (AUDIO DATA) // subchunk 2 id writeStr(data, "data"); // subchunk 2 size writeUInt(data, inputSize); byte[] byteArray = new byte[data.size()]; int index = 0; for (Byte b : data) { byteArray[index++] = b; } return byteArray; } /** * Writes string in big endian form to an output stream * * @param output stream * @param data string */ public static void writeStr(List output, String data) { for (int i = 0; i < data.length(); i++) { output.add((byte) data.charAt(i)); } } public static void writeUInt(List output, long data) { output.add((byte) (data)); output.add((byte) (data >> 8)); output.add((byte) (data >> 16)); output.add((byte) (data >> 24)); } public static void writeUShort(List output, short data) { output.add((byte) (data)); output.add((byte) (data >> 8)); } }