fix: reduce mp3 false-positive

This commit is contained in:
鲁树人
2025-09-08 20:29:23 +09:00
parent 479a27495d
commit 9ba4eed1ea
9 changed files with 146 additions and 44 deletions

Binary file not shown.

Binary file not shown.

9
um_audio/src/aac.rs Normal file
View File

@@ -0,0 +1,9 @@
pub const SYNC_FRAME_TEST_SIZE: usize = 4096;
pub fn is_aac(magic: u32) -> bool {
// Frame sync should have the first 12 bits set to 1.
const AAC_AND_MASK: u32 = 0b1111_1111_1111_0110u32 << 16;
const AAC_EXPECTED: u32 = 0b1111_1111_1111_0000u32 << 16;
(magic & AAC_AND_MASK) == AAC_EXPECTED
}

View File

@@ -1,10 +1,11 @@
mod metadata;
mod aac;
mod audio_type;
mod sync_frame;
mod metadata;
mod mp3;
use crate::sync_frame::SYNC_FRAME_TEST_SIZE;
use crate::aac::SYNC_FRAME_TEST_SIZE;
use aac::is_aac;
pub use audio_type::{AudioError, AudioType};
use sync_frame::{is_aac, is_mp3};
const MAGIC_FLAC: [u8; 4] = *b"fLaC";
const MAGIC_OGG: [u8; 4] = *b"OggS";
@@ -35,7 +36,7 @@ pub fn detect_audio_type(buffer: &[u8]) -> Result<AudioType, AudioError> {
let magic = u32::from_be_bytes(magic);
if is_aac(magic) {
return Ok(AudioType::AAC);
} else if is_mp3(magic) {
} else if mp3::is_mp3(buffer) {
return Ok(AudioType::MP3);
}
@@ -61,17 +62,7 @@ pub fn detect_audio_type(buffer: &[u8]) -> Result<AudioType, AudioError> {
};
}
// brute force test for MP3 / AAC
for magic_window in buffer.windows(4).take(SYNC_FRAME_TEST_SIZE) {
let magic = u32::from_be_bytes(magic_window.try_into().unwrap());
if is_mp3(magic) {
return Ok(AudioType::MP3);
} else if is_aac(magic) {
return Ok(AudioType::AAC);
}
}
// Ask for more data to test for MP3 / AAC
// Ask for more data to test for MP3
if buffer.len() < SYNC_FRAME_TEST_SIZE {
return Err(AudioError::NeedMoreHeader(offset + SYNC_FRAME_TEST_SIZE));
}
@@ -85,7 +76,7 @@ mod tests {
#[test]
fn test_mp3() {
let mp3_data = include_bytes!("__fixtures__/mp3_with_id3v2.bin");
let mp3_data = include_bytes!("__fixtures__/ffmpeg_silent.mp3");
let result = detect_audio_type(mp3_data).expect("failed to parse mp3");
assert_eq!(result, AudioType::MP3);
}
@@ -111,4 +102,11 @@ mod tests {
let result = detect_audio_type(&mp3_data).expect("failed to parse mp3");
assert_eq!(result, AudioType::Unknown);
}
#[test]
fn test_mp3_invalid_2() {
let mp3_data = include_bytes!("__fixtures__/junk.bin");
let result = detect_audio_type(mp3_data).expect("failed to parse mp3");
assert_eq!(result, AudioType::Unknown);
}
}

122
um_audio/src/mp3.rs Normal file
View File

@@ -0,0 +1,122 @@
pub fn is_mp3(buf: &[u8]) -> bool {
scan_for_mp3(buf) >= 3
}
pub fn scan_for_mp3(buf: &[u8]) -> usize {
let n = buf.len();
if n < 4 {
return 0;
}
let mut cache = vec![0; n];
// Scan through buffer for a possible frame header
for i in 0..n - 4 {
let h = u32::from_be_bytes([buf[i], buf[i + 1], buf[i + 2], buf[i + 3]]);
if let Some(frame_size) = parse_mp3_header(h) {
cache[i] = i + frame_size;
}
}
// find the longest chain of valid frames
let mut result = 0;
for i in 0..n - 4 {
let mut result_at_i = 0;
let mut i = i;
while i < n && cache[i] != 0 {
result_at_i += 1;
i = cache[i];
}
result = result.max(result_at_i);
}
result
}
fn parse_mp3_header(h: u32) -> Option<usize> {
let sync = (h >> 21) & 0x7FF;
if sync != 0x7FF {
return None;
}
let version_id = (h >> 19) & 0b11;
if version_id == 0b01 {
return None; // reserved
}
let layer = (h >> 17) & 0b11;
if layer == 0b00 {
return None; // reserved
}
let bitrate_idx = (h >> 12) & 0b1111;
if bitrate_idx == 0b0000 || bitrate_idx == 0b1111 {
return None;
}
let sampling_idx = (h >> 10) & 0b11;
if sampling_idx == 0b11 {
return None;
}
let padding = (h >> 9) & 0b1;
// Lookup tables
let bitrate = bitrate_kbps(version_id, layer, bitrate_idx)? * 1000;
let sample_rate = sample_rate_hz(version_id, sampling_idx)?;
let frame_len = match (version_id, layer) {
// Layer I
(_, 0b11) => ((12 * bitrate / sample_rate) + padding) * 4,
// Layer II or III
(0b11, _) => (144 * bitrate / sample_rate) + padding, // MPEG1
(_, _) => (72 * bitrate / sample_rate) + padding, // MPEG2/2.5
};
Some(frame_len as usize)
}
fn bitrate_kbps(version: u32, layer: u32, idx: u32) -> Option<u32> {
if idx == 0 || idx == 15 || layer == 0 {
// invalid
return None;
}
let table = match (version, layer) {
// MPEG Version 1
(0b11, 0b11) => [
32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448,
],
(0b11, 0b10) => [
32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384,
],
(0b11, 0b01) => [
32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320,
],
// MPEG Version 2 or 2.5
// Layer I
(_, 0b11) => [
32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256,
],
// Layer II
_ => [8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160],
};
Some(table[(idx - 1) as usize])
}
fn sample_rate_hz(version: u32, idx: u32) -> Option<u32> {
let table = match version {
// MPEG Version 1
0b11 => Some([44100, 48000, 32000]),
// MPEG Version 2
0b10 => Some([22050, 24000, 16000]),
// MPEG Version 2.5
0b00 => Some([11025, 12000, 8000]),
_ => None,
}?;
Some(table[idx as usize])
}

View File

@@ -1,27 +0,0 @@
pub const SYNC_FRAME_TEST_SIZE: usize = 0xff;
pub fn is_mp3(magic: u32) -> bool {
// Check for 11-bit sync word, followed by 2 bits of version, and 2 bits of layer.
// MPEG Version: MPEG Version 2 (ISO/IEC 13818-3) or MPEG Version 1 (ISO/IEC 11172-3)
const MP3_AND_MASK: u32 = 0b1111_1111_1111_0110u32 << 16;
const MP3_EXPECTED: u32 = 0b1111_1111_1111_0010u32 << 16;
if (magic & MP3_AND_MASK) != MP3_EXPECTED {
return false;
}
// Check for bitrate index and sampling rate frequency index.
let bitrate = ((magic >> 12) & 0b1111) as u8;
let sampling_rate = ((magic >> 10) & 0b11) as u8;
// They should not be all 1s.
bitrate != 0b1111 && sampling_rate != 0b11
}
pub fn is_aac(magic: u32) -> bool {
// Frame sync should have the first 12 bits set to 1.
const AAC_AND_MASK: u32 = 0b1111_1111_1111_0110u32 << 16;
const AAC_EXPECTED: u32 = 0b1111_1111_1111_0000u32 << 16;
(magic & AAC_AND_MASK) == AAC_EXPECTED
}