|
| 1 | +use digest::Digest; |
| 2 | +use sha1::Sha1; |
| 3 | + |
| 4 | +#[derive(Default)] |
| 5 | +pub struct TriSha1 { |
| 6 | + hasher: Sha1, |
| 7 | + offset: u64, |
| 8 | + file_size: u64, |
| 9 | + ranges: Vec<(u64, u64)>, |
| 10 | +} |
| 11 | + |
| 12 | +impl TriSha1 { |
| 13 | + pub fn new(file_size: u64) -> Self { |
| 14 | + let ranges = match file_size { |
| 15 | + // < 30M |
| 16 | + 0..31457280u64 => vec![(0, file_size)], |
| 17 | + // >= 30M |
| 18 | + _ => vec![ |
| 19 | + (0, 10485759u64), |
| 20 | + ((file_size >> 1) - 5242880, (file_size >> 1) + 5242879), |
| 21 | + (file_size - 10485760, file_size - 1), |
| 22 | + ], |
| 23 | + }; |
| 24 | + Self { |
| 25 | + file_size, |
| 26 | + ranges, |
| 27 | + ..Default::default() |
| 28 | + } |
| 29 | + } |
| 30 | + |
| 31 | + pub fn update(&mut self, data: &[u8]) { |
| 32 | + let start = self.offset; |
| 33 | + let end = start + data.len() as u64; |
| 34 | + for &(rs, re) in &self.ranges { |
| 35 | + let from = start.max(rs); |
| 36 | + let to = end.min(re + 1); |
| 37 | + if from < to { |
| 38 | + let slice = &data[(from - start) as usize..(to - start) as usize]; |
| 39 | + Digest::update(&mut self.hasher, slice); |
| 40 | + } |
| 41 | + } |
| 42 | + self.offset = end; |
| 43 | + } |
| 44 | + |
| 45 | + pub fn finalize(mut self) -> [u8; 20] { |
| 46 | + let size_bytes = self.file_size.to_ne_bytes(); |
| 47 | + Digest::update(&mut self.hasher, size_bytes); |
| 48 | + self.hasher.finalize().into() |
| 49 | + } |
| 50 | +} |
| 51 | + |
| 52 | +#[cfg(test)] |
| 53 | +mod tests { |
| 54 | + use super::*; |
| 55 | + use crate::utility::extensions::HexString; |
| 56 | + |
| 57 | + fn make_data(size: usize) -> Vec<u8> { |
| 58 | + let pat = b"114514"; |
| 59 | + let mut data = Vec::with_capacity(size); |
| 60 | + while data.len() < size { |
| 61 | + let rem = size - data.len(); |
| 62 | + let n = rem.min(pat.len()); |
| 63 | + data.extend_from_slice(&pat[..n]); |
| 64 | + } |
| 65 | + data |
| 66 | + } |
| 67 | + |
| 68 | + fn inner_test(size: u64) -> String { |
| 69 | + let mut hasher = TriSha1::new(size); |
| 70 | + let data = make_data(size as usize); |
| 71 | + for chunk in data.chunks(8192) { |
| 72 | + hasher.update(chunk); |
| 73 | + } |
| 74 | + hasher.finalize().hex() |
| 75 | + } |
| 76 | + |
| 77 | + #[test] |
| 78 | + fn tri_sha1_small() { |
| 79 | + let size = 5 * 1024 * 1024; |
| 80 | + assert_eq!(inner_test(size), "8f155d8b6b1a9c196597b01d142f0d046ad44923") |
| 81 | + } |
| 82 | + |
| 83 | + #[test] |
| 84 | + fn tri_sha1_mid() { |
| 85 | + let size = 15 * 1024 * 1024; |
| 86 | + assert_eq!(inner_test(size), "4748e8fdf867ea92d4d636e7312585ed93c80d0d") |
| 87 | + } |
| 88 | + |
| 89 | + #[test] |
| 90 | + fn tri_sha1_large() { |
| 91 | + let size = 50 * 1024 * 1024; |
| 92 | + assert_eq!(inner_test(size), "90d9a46f42f48dcee04f7390d4748e2a4c11d099") |
| 93 | + } |
| 94 | +} |
0 commit comments