七月 23rd, 2022 by lanxinxichen@126.com

参与对比工具:

  1. Concentus 纯java版本实现工具
  2. Jni + libopus1.1 jni+dll/so动态库实现工具
  3. Jni + libopus1.3 同上, 引入对比是因为看到opus 1.3版本升级说明加快的解码速度

1分半音频解码耗时如下:

测试方式 初始化耗时(ms) 转码耗时(ms)
java版opus实现 1 646
jni+libopus 1.1 2598 349
jni+libopus 1.3 未测试 未测试

 64分钟音频解码耗时如下:

测试方式 初始化耗时(ms) 转码耗时(ms)
java版opus实现 40 7233
jni+libopus 1.1 708 7848
jni+libopus 1.3 380 7356

在工程化的时候,使用jni的方式只会加载一次,所以我把初始化的耗时和转码耗时区分出来了。至于1分版音频的初始化耗时和64分钟音频初始化耗时不一致,没有具体去分析。转码耗时其实差别也不算大。

对比两种方式解码出来的波形图差异(图一为libopus,图二为java-opus):

libopus方式解码
Concentus 方式解码

波形图整体差异不大,大概是不同语言的浮点运算差异导致的。

结论:基于项目的可移植性考虑, Concentus 一定是更胜一筹的,不用考虑在本地开发和服务器上so的差异(实际上 club.minnced:opus-java把几个常用平台的so、dll库封装,系统兼容也没有那么复杂)。

使用Concentus解码代码示例:

<dependency>
    <groupId>org.concentus</groupId>
    <artifactId>Concentus</artifactId>
    <version>1.0-RELEASE</version>
</dependency>
public static long opusToWav(String sourceFilePath, String outputFilePath) {
     log.info("调用封包opus转wav格式, 入参音频路径[{}], 出参路径[{}]", sourceFilePath, outputFilePath);
     FileInputStream fileInputStream = null;
     FileOutputStream fileOutputStream = null;
     try {
     // 估算音频长度, 并尝试写入wav头
     long sourceFileSize = FileUtil.size(new File(sourceFilePath));
     // 并非严格计算, 已知80字节按照16000解码出320的数据,故x8得到完整数据大小
     long decodedFileSize = sourceFileSize / SH_OPUS_AUDIO_PACKAGE_SIZE * SH_OPUS_AUDIO_FRAME_SIZE * 8;
     log.info("估算解码后wav文件大小 [{}]", decodedFileSize);
     byte[] wavHeader = WavHeader.bytes(decodedFileSize, SH_OPUS_AUDIO_CHANNEL, SH_OPUS_AUDIO_SAMPLE, 16);
     // 输入文件流
     fileInputStream = new FileInputStream(sourceFilePath);// 输出文件流
     new FileOutputStream(outputFilePath);
     // 先写入44字节wav头信息
     fileOutputStream.write(wavHeader);
     // 双猴默认参数
     OpusDecoder decoder = new OpusDecoder(SH_OPUS_AUDIO_SAMPLE, SH_OPUS_AUDIO_CHANNEL);
     byte[] bytes = new byte[SH_OPUS_AUDIO_PACKAGE_SIZE];
     int samples = 0;
     while (fileInputStream.read(bytes) != -1) {
     byte[] opusBytes = new byte[SH_OPUS_AUDIO_FRAME_SIZE];
     int length = bytes.length;
     // 封包前两个字节去掉, 生效的是opus的package, 封包格式按照约束的16k 单声道
     System.arraycopy(bytes, SH_OPUS_AUDIO_PACKAGE_SIZE - SH_OPUS_AUDIO_FRAME_SIZE, opusBytes, 0,
             length - (SH_OPUS_AUDIO_PACKAGE_SIZE - SH_OPUS_AUDIO_FRAME_SIZE));
     byte[] dataPacket = new byte[SH_OPUS_AUDIO_SAMPLE];
     int samplesDecoded =
             decoder.decode(opusBytes, 0, opusBytes.length, dataPacket, 0,
                     SH_OPUS_AUDIO_SAMPLE / 2, false);
     fileOutputStream.write(dataPacket, 0, samplesDecoded * 2);
     samples += samplesDecoded;
 }
     return samples * 1000 / SH_OPUS_AUDIO_SAMPLE;
 } catch (IOException | OpusException e) {
     log.error("转码失败, 入参音频路径[{}], 出参路径[{}]", sourceFilePath, outputFilePath, e);
     return 0;
 } finally {
     IoUtil.close(fileInputStream);
     IoUtil.close(fileOutputStream);
 }
package com.aicloud.listen.util;

import cn.hutool.core.io.IoUtil;
import lombok.extern.slf4j.Slf4j;

import java.io.File;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.List;


@Slf4j
public class WavHeader {

    private static final int CHANNEL_NUMS = 2;
    private static final int SAMPLE_RATE = 16000;
    private static final int BIT_PER_SAMPLE = 16;
    private static final long MAX_FILE_SIZE = 0xffffffffL - 36;

    /**
     * 默认参数生成wav头信息
     *
     * @param inputSize
     * @return
     */
    public static byte[] defaultValue(long inputSize) {
        return bytes(inputSize, CHANNEL_NUMS, SAMPLE_RATE, BIT_PER_SAMPLE);
    }

    /**
     * 校验文件是否符合wav最大要求
     * 单独抽取一个方法是为了让文件尽可能写进去, 严进宽出, 一旦超过了可以写入, 但是不允许下次写入
     *
     * @param inputSize
     */
    public static void checkFile(long inputSize) {
        if (inputSize > MAX_FILE_SIZE) {
            throw new RuntimeException("file data exceeded maximum limit");
        }
    }

    /**
     * 修复语音文件(临时工具)
     *
     * @param audioPath
     */
    public static void fixAudio(String audioPath) {
        // 判断当前任务是否有有效数据, 如果没有有效数据的话,
        File file = new File(audioPath);
        long length = file.length();
        if (length > 44) {
            // 读取wav头, 判断wav头是否正确
            // 修改wav头
            RandomAccessFile raf = null;
            try {
                raf = new RandomAccessFile(file, "rw");
                raf.seek(40);
                int dataSize = raf.readInt();
                if (dataSize != length - 44) {
                    byte[] header = WavHeader.defaultValue(length - 44);
                    raf.seek(0);
                    raf.write(header);
                }
            } catch (Exception e) {
                log.error("修复wav文件头信息出错 file:[{}]", file.getAbsolutePath());
            } finally {
                IoUtil.close(raf);
            }
        }
    }

    /**
     * 生成wav头数据
     *
     * @param inputSize     raw PCM data
     *                      limit of file size for wave file: < 2^(2*4) - 36 bytes (~4GB)
     * @param channelCount  number of channels: 1 for mono, 2 for stereo, etc.
     * @param sampleRate    sample rate of PCM audio
     * @param bitsPerSample bits per sample, i.e. 16 for PCM16
     * @return
     * @see soundfile.sapp.org/doc/WaveFormat
     */
    public static byte[] bytes(long inputSize, int channelCount, int sampleRate, int bitsPerSample) {
        List data = new ArrayList<>(44);
        // WAVE RIFF header
        // chunk id
        writeStr(data, "RIFF");
        // chunk size
        writeUInt(data, 36 + inputSize);
        // format
        writeStr(data, "WAVE");

        // SUB CHUNK 1 (FORMAT)
        // subchunk 1 id
        writeStr(data, "fmt ");
        // subchunk 1 size
        writeUInt(data, 16);
        // audio format (1 = PCM)
        writeUShort(data, (short) 1);
        // number of channelCount
        writeUShort(data, (short) channelCount);
        // sample rate
        writeUInt(data, sampleRate);
        // byte rate
        writeUInt(data, sampleRate * channelCount * bitsPerSample / 8);
        // block align
        writeUShort(data, (short) (channelCount * bitsPerSample / 8));
        // bits per sample
        writeUShort(data, (short) bitsPerSample);

        // SUB CHUNK 2 (AUDIO DATA)
        // subchunk 2 id
        writeStr(data, "data");
        // subchunk 2 size
        writeUInt(data, inputSize);
        byte[] byteArray = new byte[data.size()];
        int index = 0;
        for (Byte b : data) {
            byteArray[index++] = b;
        }
        return byteArray;
    }

    /**
     * Writes string in big endian form to an output stream
     *
     * @param output stream
     * @param data   string
     */
    public static void writeStr(List output, String data) {
        for (int i = 0; i < data.length(); i++) {
            output.add((byte) data.charAt(i));
        }
    }

    public static void writeUInt(List output, long data) {
        output.add((byte) (data));
        output.add((byte) (data >> 8));
        output.add((byte) (data >> 16));
        output.add((byte) (data >> 24));
    }

    public static void writeUShort(List output, short data) {
        output.add((byte) (data));
        output.add((byte) (data >> 8));
    }
 }

Posted in 未分类 Tagged with: ,