From a008de06269901d9a1ecd7405caa10a175d14078 Mon Sep 17 00:00:00 2001 From: JohnnyFFM Date: Thu, 11 Apr 2019 09:39:01 +0200 Subject: [PATCH] more rust (#59) --- .gitignore | 2 + src/cpu_worker.rs | 82 +++----- src/main.rs | 4 +- src/miner.rs | 11 +- src/poc_hashing.rs | 43 ++++ src/pocmath.rs | 20 -- src/shabal256.rs | 512 +++++++++++++++++++++++++++++++++++++++++++++ src/shabals.rs | 75 ------- 8 files changed, 595 insertions(+), 154 deletions(-) create mode 100644 src/poc_hashing.rs delete mode 100644 src/pocmath.rs create mode 100644 src/shabal256.rs delete mode 100644 src/shabals.rs diff --git a/.gitignore b/.gitignore index 716eb97..72430ce 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ *.o /log /.vscode +**/*.exe +**/*.bat diff --git a/src/cpu_worker.rs b/src/cpu_worker.rs index 5b81462..15d43ef 100644 --- a/src/cpu_worker.rs +++ b/src/cpu_worker.rs @@ -1,21 +1,13 @@ use crate::miner::{Buffer, NonceData}; +use crate::poc_hashing::find_best_deadline_rust; use crate::reader::ReadReply; use crossbeam_channel::{Receiver, Sender}; use futures::sync::mpsc; use futures::{Future, Sink}; +#[cfg(any(feature = "simd", feature = "neon"))] use libc::{c_void, uint64_t}; use std::u64; -extern "C" { - pub fn find_best_deadline_sph( - scoops: *mut c_void, - nonce_count: uint64_t, - gensig: *const c_void, - best_deadline: *mut uint64_t, - best_offset: *mut uint64_t, - ) -> (); -} - cfg_if! { if #[cfg(feature = "simd")] { extern "C" { @@ -125,8 +117,10 @@ pub fn hash( if read_reply.info.len == 1 && read_reply.info.gpu_signal > 0 { return; } - + + #[allow(unused_assignments)] let mut deadline: u64 = u64::MAX; + #[allow(unused_assignments)] let mut offset: u64 = 0; let bs = buffer.get_buffer_for_writing(); @@ -167,13 +161,13 @@ pub fn hash( &mut offset, ); } else { - find_best_deadline_sph( - bs.as_ptr() as *mut c_void, + let result = find_best_deadline_rust( + &bs, (read_reply.info.len as u64) / 64, - read_reply.info.gensig.as_ptr() as *const c_void, - &mut deadline, - &mut offset, + &*read_reply.info.gensig, ); + deadline = result.0; + offset = result.1; } } #[cfg(feature = "neon")] @@ -191,24 +185,25 @@ pub fn hash( &mut offset, ); } else { - find_best_deadline_sph( - bs.as_ptr() as *mut c_void, + let result = find_best_deadline_rust( + &bs, (read_reply.info.len as u64) / 64, - read_reply.info.gensig.as_ptr() as *const c_void, - &mut deadline, - &mut offset, + &*read_reply.info.gensig, ); + deadline = result.0; + offset = result.1; } } + #[cfg(not(any(feature = "simd", feature = "neon")))] - unsafe { - find_best_deadline_sph( - bs.as_ptr() as *mut c_void, + { + let result = find_best_deadline_rust( + &bs, (read_reply.info.len as u64) / 64, - read_reply.info.gensig.as_ptr() as *const c_void, - &mut deadline, - &mut offset, + &*read_reply.info.gensig, ); + deadline = result.0; + offset = result.1; } tx_nonce_data @@ -231,20 +226,10 @@ pub fn hash( #[cfg(test)] mod tests { + use crate::poc_hashing::find_best_deadline_rust; use hex; - use libc::{c_void, uint64_t}; use std::u64; - extern "C" { - pub fn find_best_deadline_sph( - scoops: *mut c_void, - nonce_count: uint64_t, - gensig: *const c_void, - best_deadline: *mut uint64_t, - best_offset: *mut uint64_t, - ) -> (); - } - cfg_if! { if #[cfg(feature = "simd")] { extern "C" { @@ -304,11 +289,14 @@ mod tests { #[test] fn test_deadline_hashing() { - let mut deadline: u64 = u64::MAX; - let mut offset: u64 = 0; + let mut deadline: u64; let gensig = hex::decode("4a6f686e6e7946464d206861742064656e206772f6df74656e2050656e697321") .unwrap(); + + let mut gensig_array = [0u8; 32]; + gensig_array.copy_from_slice(&gensig[..]); + let winner: [u8; 64] = [0; 64]; let loser: [u8; 64] = [5; 64]; let mut data: [u8; 64 * 32] = [5; 64 * 32]; @@ -316,18 +304,10 @@ mod tests { for i in 0..32 { data[i * 64..i * 64 + 64].clone_from_slice(&winner); - unsafe { - find_best_deadline_sph( - data.as_ptr() as *mut c_void, - (i + 1) as u64, - gensig.as_ptr() as *const c_void, - &mut deadline, - &mut offset, - ); - } + let result = find_best_deadline_rust(&data, (i + 1) as u64, &gensig_array); + deadline = result.0; + assert_eq!(3084580316385335914u64, deadline); - deadline = u64::MAX; - offset = 0; data[i * 64..i * 64 + 64].clone_from_slice(&loser); } } diff --git a/src/main.rs b/src/main.rs index c9ecb61..e2e4dad 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,10 +13,10 @@ mod cpu_worker; mod logger; mod miner; mod plot; -mod pocmath; +mod poc_hashing; mod reader; mod requests; -mod shabals; +mod shabal256; mod utils; #[cfg(feature = "opencl")] diff --git a/src/miner.rs b/src/miner.rs index d42d803..819f60c 100644 --- a/src/miner.rs +++ b/src/miner.rs @@ -9,7 +9,7 @@ use crate::ocl::GpuBuffer; #[cfg(feature = "opencl")] use crate::ocl::GpuContext; use crate::plot::{Plot, SCOOP_SIZE}; -use crate::pocmath; +use crate::poc_hashing; use crate::reader::Reader; use crate::requests::RequestHandler; use crate::utils::{get_device_id, new_thread_pool}; @@ -132,9 +132,7 @@ fn scan_plots( if let Ok(p) = Plot::new(file, use_direct_io, dummy) { let drive_id = get_device_id(&file.to_str().unwrap().to_string()); - let plots = drive_id_to_plots - .entry(drive_id) - .or_insert(Vec::new()); + let plots = drive_id_to_plots.entry(drive_id).or_insert(Vec::new()); local_capacity += p.meta.nonces as u64; plots.push(Mutex::new(p)); @@ -454,10 +452,11 @@ impl Miner { state.server_target_deadline = mining_info.target_deadline; let gensig = - pocmath::decode_gensig(&mining_info.generation_signature); + poc_hashing::decode_gensig(&mining_info.generation_signature); state.generation_signature = mining_info.generation_signature; - let scoop = pocmath::calculate_scoop(mining_info.height, &gensig); + let scoop = + poc_hashing::calculate_scoop(mining_info.height, &gensig); info!( "{: <80}", format!( diff --git a/src/poc_hashing.rs b/src/poc_hashing.rs new file mode 100644 index 0000000..f84c5ad --- /dev/null +++ b/src/poc_hashing.rs @@ -0,0 +1,43 @@ +use crate::shabal256::{shabal256_deadline_fast, shabal256_hash_fast}; +use hex; +use std::mem::transmute; +use std::u64; + +const SCOOP_SIZE: usize = 64; + +pub fn decode_gensig(gensig: &str) -> [u8; 32] { + let mut gensig_bytes = [0; 32]; + gensig_bytes[..].clone_from_slice(&hex::decode(gensig).unwrap()); + gensig_bytes +} + +pub fn calculate_scoop(height: u64, gensig: &[u8; 32]) -> u32 { + let mut data: [u8; 64] = [0; 64]; + let height_bytes: [u8; 8] = unsafe { transmute(height.to_be()) }; + + data[..32].clone_from_slice(gensig); + data[32..40].clone_from_slice(&height_bytes); + data[40] = 0x80; + let data = unsafe { std::mem::transmute::<&[u8; 64], &[u32; 16]>(&data) }; + + let new_gensig = &shabal256_hash_fast(&[], &data); + (u32::from(new_gensig[30] & 0x0F) << 8) | u32::from(new_gensig[31]) +} + +pub fn find_best_deadline_rust( + data: &[u8], + number_of_nonces: u64, + gensig: &[u8; 32], +) -> (u64, u64) { + let mut best_deadline = u64::MAX; + let mut best_offset = 0; + for i in 0..number_of_nonces as usize { + let result = + shabal256_deadline_fast(&data[i * SCOOP_SIZE..i * SCOOP_SIZE + SCOOP_SIZE], &gensig); + if result < best_deadline { + best_deadline = result; + best_offset = i; + } + } + (best_deadline, best_offset as u64) +} diff --git a/src/pocmath.rs b/src/pocmath.rs deleted file mode 100644 index ada433a..0000000 --- a/src/pocmath.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::shabals; -use hex; -use std::mem::transmute; - -pub fn decode_gensig(gensig: &str) -> [u8; 32] { - let mut gensig_bytes = [0; 32]; - gensig_bytes[..].clone_from_slice(&hex::decode(gensig).unwrap()); - gensig_bytes -} - -pub fn calculate_scoop(height: u64, gensig: &[u8; 32]) -> u32 { - let mut data: [u8; 40] = [0; 40]; - let height_bytes: [u8; 8] = unsafe { transmute(height.to_be()) }; - - data[32..].clone_from_slice(&height_bytes); - data[..32].clone_from_slice(gensig); - - let new_gensig = shabals::shabal256(&data); - (u32::from(new_gensig[30] & 0x0F) << 8) | u32::from(new_gensig[31]) -} diff --git a/src/shabal256.rs b/src/shabal256.rs new file mode 100644 index 0000000..70cbdaf --- /dev/null +++ b/src/shabal256.rs @@ -0,0 +1,512 @@ +use std::slice::from_raw_parts; + +const A_INIT: [u32; 12] = [ + 0x52F84552, 0xE54B7999, 0x2D8EE3EC, 0xB9645191, 0xE0078B86, 0xBB7C44C9, 0xD2B5C1CA, 0xB0D2EB8C, + 0x14CE5A45, 0x22AF50DC, 0xEFFDBC6B, 0xEB21B74A, +]; + +const B_INIT: [u32; 16] = [ + 0xB555C6EE, 0x3E710596, 0xA72A652F, 0x9301515F, 0xDA28C1FA, 0x696FD868, 0x9CB6BF72, 0x0AFE4002, + 0xA6E03615, 0x5138C1D4, 0xBE216306, 0xB38B8890, 0x3EA8B96B, 0x3299ACE4, 0x30924DD4, 0x55CB34A5, +]; + +const C_INIT: [u32; 16] = [ + 0xB405F031, 0xC4233EBA, 0xB3733979, 0xC0DD9D55, 0xC51C28AE, 0xA327B8E1, 0x56C56167, 0xED614433, + 0x88B59D60, 0x60E2CEBA, 0x758B4B8B, 0x83E82A7F, 0xBC968828, 0xE6E00BF7, 0xBA839E55, 0x9B491C60, +]; + +pub fn shabal256_deadline_fast(data: &[u8], gensig: &[u8; 32]) -> u64 { + let mut a = A_INIT; + let mut b = B_INIT; + let mut c = C_INIT; + let mut w_high = 0u32; + let mut w_low = 1u32; + let data_ptr = data.as_ptr() as *const u32; + let data = unsafe { from_raw_parts(data_ptr, data.len() / 4) }; + let gensig = unsafe { std::mem::transmute::<&[u8; 32], &[u32; 8]>(&gensig) }; + let mut term = [0u32; 8]; + term[0] = 0x80; + + input_block_add_dl(&mut b, &gensig[..], &data[..8]); + xor_w(&mut a, w_low, w_high); + apply_p_dl(&mut a, &mut b, &c, gensig, &data[..8]); + input_block_sub_dl(&mut c, gensig, &data[..8]); + swap_bc(&mut b, &mut c); + incr_w(&mut w_low, &mut w_high); + + input_block_add_dl(&mut b, &data[8..], &term); + xor_w(&mut a, w_low, w_high); + apply_p_dl(&mut a, &mut b, &c, &data[8..], &term); + for _ in 0..3 { + swap_bc(&mut b, &mut c); + xor_w(&mut a, w_low, w_high); + apply_p_dl(&mut a, &mut b, &c, &data[8..], &term); + } + let b = unsafe { std::mem::transmute::<&[u32; 16], &[u64; 8]>(&b) }; + b[4] +} + +pub fn shabal256_hash_fast(data: &[u8], term: &[u32; 16]) -> [u8; 32] { + let mut a = A_INIT; + let mut b = B_INIT; + let mut c = C_INIT; + let mut w_high = 0u32; + let mut w_low = 1u32; + let mut num = data.len() >> 6; + let mut ptr = 0; + let data_ptr = data.as_ptr() as *const u32; + let data = unsafe { from_raw_parts(data_ptr, data.len() / 4) }; + + while num > 0 { + input_block_add(&mut b, &data[ptr..]); + xor_w(&mut a, w_low, w_high); + apply_p(&mut a, &mut b, &c, &data[ptr..]); + input_block_sub(&mut c, &data[ptr..]); + swap_bc(&mut b, &mut c); + incr_w(&mut w_low, &mut w_high); + ptr = ptr.wrapping_add(16); + num = num.wrapping_sub(1); + } + input_block_add(&mut b, term); + xor_w(&mut a, w_low, w_high); + apply_p(&mut a, &mut b, &c, term); + for _ in 0..3 { + swap_bc(&mut b, &mut c); + xor_w(&mut a, w_low, w_high); + apply_p(&mut a, &mut b, &c, term); + } + unsafe { *(b[8..16].as_ptr() as *const [u8; 32]) } +} + +#[inline(always)] +fn input_block_add(b: &mut [u32; 16], data: &[u32]) { + for (element, data) in b.iter_mut().zip(data.iter()) { + *element = element.wrapping_add(*data); + } +} + +#[inline(always)] +fn input_block_add_dl(b: &mut [u32; 16], data_a: &[u32], data_b: &[u32]) { + unsafe { + *b.get_unchecked_mut(0) = b + .get_unchecked_mut(0) + .wrapping_add(*data_a.get_unchecked(0)); + *b.get_unchecked_mut(1) = b + .get_unchecked_mut(1) + .wrapping_add(*data_a.get_unchecked(1)); + *b.get_unchecked_mut(2) = b + .get_unchecked_mut(2) + .wrapping_add(*data_a.get_unchecked(2)); + *b.get_unchecked_mut(3) = b + .get_unchecked_mut(3) + .wrapping_add(*data_a.get_unchecked(3)); + *b.get_unchecked_mut(4) = b + .get_unchecked_mut(4) + .wrapping_add(*data_a.get_unchecked(4)); + *b.get_unchecked_mut(5) = b + .get_unchecked_mut(5) + .wrapping_add(*data_a.get_unchecked(5)); + *b.get_unchecked_mut(6) = b + .get_unchecked_mut(6) + .wrapping_add(*data_a.get_unchecked(6)); + *b.get_unchecked_mut(7) = b + .get_unchecked_mut(7) + .wrapping_add(*data_a.get_unchecked(7)); + *b.get_unchecked_mut(8) = b + .get_unchecked_mut(8) + .wrapping_add(*data_b.get_unchecked(0)); + *b.get_unchecked_mut(9) = b + .get_unchecked_mut(9) + .wrapping_add(*data_b.get_unchecked(1)); + *b.get_unchecked_mut(10) = b + .get_unchecked_mut(10) + .wrapping_add(*data_b.get_unchecked(2)); + *b.get_unchecked_mut(11) = b + .get_unchecked_mut(11) + .wrapping_add(*data_b.get_unchecked(3)); + *b.get_unchecked_mut(12) = b + .get_unchecked_mut(12) + .wrapping_add(*data_b.get_unchecked(4)); + *b.get_unchecked_mut(13) = b + .get_unchecked_mut(13) + .wrapping_add(*data_b.get_unchecked(5)); + *b.get_unchecked_mut(14) = b + .get_unchecked_mut(14) + .wrapping_add(*data_b.get_unchecked(6)); + *b.get_unchecked_mut(15) = b + .get_unchecked_mut(15) + .wrapping_add(*data_b.get_unchecked(7)); + } +} + +#[inline(always)] +fn input_block_sub(c: &mut [u32; 16], data: &[u32]) { + for (element, data) in c.iter_mut().zip(data.iter()) { + *element = element.wrapping_sub(*data); + } +} + +#[inline(always)] +fn input_block_sub_dl(b: &mut [u32; 16], data_a: &[u32], data_b: &[u32]) { + unsafe { + *b.get_unchecked_mut(0) = b + .get_unchecked_mut(0) + .wrapping_sub(*data_a.get_unchecked(0)); + *b.get_unchecked_mut(1) = b + .get_unchecked_mut(1) + .wrapping_sub(*data_a.get_unchecked(1)); + *b.get_unchecked_mut(2) = b + .get_unchecked_mut(2) + .wrapping_sub(*data_a.get_unchecked(2)); + *b.get_unchecked_mut(3) = b + .get_unchecked_mut(3) + .wrapping_sub(*data_a.get_unchecked(3)); + *b.get_unchecked_mut(4) = b + .get_unchecked_mut(4) + .wrapping_sub(*data_a.get_unchecked(4)); + *b.get_unchecked_mut(5) = b + .get_unchecked_mut(5) + .wrapping_sub(*data_a.get_unchecked(5)); + *b.get_unchecked_mut(6) = b + .get_unchecked_mut(6) + .wrapping_sub(*data_a.get_unchecked(6)); + *b.get_unchecked_mut(7) = b + .get_unchecked_mut(7) + .wrapping_sub(*data_a.get_unchecked(7)); + *b.get_unchecked_mut(8) = b + .get_unchecked_mut(8) + .wrapping_sub(*data_b.get_unchecked(0)); + *b.get_unchecked_mut(9) = b + .get_unchecked_mut(9) + .wrapping_sub(*data_b.get_unchecked(1)); + *b.get_unchecked_mut(10) = b + .get_unchecked_mut(10) + .wrapping_sub(*data_b.get_unchecked(2)); + *b.get_unchecked_mut(11) = b + .get_unchecked_mut(11) + .wrapping_sub(*data_b.get_unchecked(3)); + *b.get_unchecked_mut(12) = b + .get_unchecked_mut(12) + .wrapping_sub(*data_b.get_unchecked(4)); + *b.get_unchecked_mut(13) = b + .get_unchecked_mut(13) + .wrapping_sub(*data_b.get_unchecked(5)); + *b.get_unchecked_mut(14) = b + .get_unchecked_mut(14) + .wrapping_sub(*data_b.get_unchecked(6)); + *b.get_unchecked_mut(15) = b + .get_unchecked_mut(15) + .wrapping_sub(*data_b.get_unchecked(7)); + } +} + +#[inline(always)] +fn xor_w(a: &mut [u32; 12], w_low: u32, w_high: u32) { + a[0] ^= w_low; + a[1] ^= w_high; +} + +#[inline(always)] +fn apply_p(a: &mut [u32; 12], b: &mut [u32; 16], c: &[u32; 16], data: &[u32]) { + for element in b.iter_mut() { + *element = element.wrapping_shl(17) | element.wrapping_shr(15); + } + perm(a, b, c, data); + a[0] = a[0] + .wrapping_add(c[11]) + .wrapping_add(c[15]) + .wrapping_add(c[3]); + a[1] = a[1] + .wrapping_add(c[12]) + .wrapping_add(c[0]) + .wrapping_add(c[4]); + a[2] = a[2] + .wrapping_add(c[13]) + .wrapping_add(c[1]) + .wrapping_add(c[5]); + a[3] = a[3] + .wrapping_add(c[14]) + .wrapping_add(c[2]) + .wrapping_add(c[6]); + a[4] = a[4] + .wrapping_add(c[15]) + .wrapping_add(c[3]) + .wrapping_add(c[7]); + a[5] = a[5] + .wrapping_add(c[0]) + .wrapping_add(c[4]) + .wrapping_add(c[8]); + a[6] = a[6] + .wrapping_add(c[1]) + .wrapping_add(c[5]) + .wrapping_add(c[9]); + a[7] = a[7] + .wrapping_add(c[2]) + .wrapping_add(c[6]) + .wrapping_add(c[10]); + a[8] = a[8] + .wrapping_add(c[3]) + .wrapping_add(c[7]) + .wrapping_add(c[11]); + a[9] = a[9] + .wrapping_add(c[4]) + .wrapping_add(c[8]) + .wrapping_add(c[12]); + a[10] = a[10] + .wrapping_add(c[5]) + .wrapping_add(c[9]) + .wrapping_add(c[13]); + a[11] = a[11] + .wrapping_add(c[6]) + .wrapping_add(c[10]) + .wrapping_add(c[14]); +} + +#[inline(always)] +fn apply_p_dl(a: &mut [u32; 12], b: &mut [u32; 16], c: &[u32; 16], data_a: &[u32], data_b: &[u32]) { + for element in b.iter_mut() { + *element = element.wrapping_shl(17) | element.wrapping_shr(15); + } + perm_dl(a, b, c, data_a, data_b); + a[0] = a[0] + .wrapping_add(c[11]) + .wrapping_add(c[15]) + .wrapping_add(c[3]); + a[1] = a[1] + .wrapping_add(c[12]) + .wrapping_add(c[0]) + .wrapping_add(c[4]); + a[2] = a[2] + .wrapping_add(c[13]) + .wrapping_add(c[1]) + .wrapping_add(c[5]); + a[3] = a[3] + .wrapping_add(c[14]) + .wrapping_add(c[2]) + .wrapping_add(c[6]); + a[4] = a[4] + .wrapping_add(c[15]) + .wrapping_add(c[3]) + .wrapping_add(c[7]); + a[5] = a[5] + .wrapping_add(c[0]) + .wrapping_add(c[4]) + .wrapping_add(c[8]); + a[6] = a[6] + .wrapping_add(c[1]) + .wrapping_add(c[5]) + .wrapping_add(c[9]); + a[7] = a[7] + .wrapping_add(c[2]) + .wrapping_add(c[6]) + .wrapping_add(c[10]); + a[8] = a[8] + .wrapping_add(c[3]) + .wrapping_add(c[7]) + .wrapping_add(c[11]); + a[9] = a[9] + .wrapping_add(c[4]) + .wrapping_add(c[8]) + .wrapping_add(c[12]); + a[10] = a[10] + .wrapping_add(c[5]) + .wrapping_add(c[9]) + .wrapping_add(c[13]); + a[11] = a[11] + .wrapping_add(c[6]) + .wrapping_add(c[10]) + .wrapping_add(c[14]); +} + +#[inline(always)] +fn perm_elt( + a: &mut [u32; 12], + b: &mut [u32; 16], + xa0: usize, + xa1: usize, + xb0: usize, + xb1: usize, + xb2: usize, + xb3: usize, + xc: u32, + xm: u32, +) { + unsafe { + *a.get_unchecked_mut(xa0) = (a.get_unchecked(xa0) + ^ ((a.get_unchecked(xa1).wrapping_shl(15u32) + | a.get_unchecked(xa1).wrapping_shr(17u32)) + .wrapping_mul(5u32)) + ^ xc) + .wrapping_mul(3u32) + ^ b.get_unchecked(xb1) + ^ (b.get_unchecked(xb2) & !b.get_unchecked(xb3)) + ^ xm; + *b.get_unchecked_mut(xb0) = !((b.get_unchecked(xb0).wrapping_shl(1) + | b.get_unchecked(xb0).wrapping_shr(31)) + ^ a.get_unchecked(xa0)); + } +} + +#[inline(always)] +fn perm(a: &mut [u32; 12], b: &mut [u32; 16], c: &[u32; 16], data: &[u32]) { + unsafe { + perm_elt(a, b, 0, 11, 0, 13, 9, 6, c[8], *data.get_unchecked(0)); + perm_elt(a, b, 1, 0, 1, 14, 10, 7, c[7], *data.get_unchecked(1)); + perm_elt(a, b, 2, 1, 2, 15, 11, 8, c[6], *data.get_unchecked(2)); + perm_elt(a, b, 3, 2, 3, 0, 12, 9, c[5], *data.get_unchecked(3)); + perm_elt(a, b, 4, 3, 4, 1, 13, 10, c[4], *data.get_unchecked(4)); + perm_elt(a, b, 5, 4, 5, 2, 14, 11, c[3], *data.get_unchecked(5)); + perm_elt(a, b, 6, 5, 6, 3, 15, 12, c[2], *data.get_unchecked(6)); + perm_elt(a, b, 7, 6, 7, 4, 0, 13, c[1], *data.get_unchecked(7)); + perm_elt(a, b, 8, 7, 8, 5, 1, 14, c[0], *data.get_unchecked(8)); + perm_elt(a, b, 9, 8, 9, 6, 2, 15, c[15], *data.get_unchecked(9)); + perm_elt(a, b, 10, 9, 10, 7, 3, 0, c[14], *data.get_unchecked(10)); + perm_elt(a, b, 11, 10, 11, 8, 4, 1, c[13], *data.get_unchecked(11)); + perm_elt(a, b, 0, 11, 12, 9, 5, 2, c[12], *data.get_unchecked(12)); + perm_elt(a, b, 1, 0, 13, 10, 6, 3, c[11], *data.get_unchecked(13)); + perm_elt(a, b, 2, 1, 14, 11, 7, 4, c[10], *data.get_unchecked(14)); + perm_elt(a, b, 3, 2, 15, 12, 8, 5, c[9], *data.get_unchecked(15)); + perm_elt(a, b, 4, 3, 0, 13, 9, 6, c[8], *data.get_unchecked(0)); + perm_elt(a, b, 5, 4, 1, 14, 10, 7, c[7], *data.get_unchecked(1)); + perm_elt(a, b, 6, 5, 2, 15, 11, 8, c[6], *data.get_unchecked(2)); + perm_elt(a, b, 7, 6, 3, 0, 12, 9, c[5], *data.get_unchecked(3)); + perm_elt(a, b, 8, 7, 4, 1, 13, 10, c[4], *data.get_unchecked(4)); + perm_elt(a, b, 9, 8, 5, 2, 14, 11, c[3], *data.get_unchecked(5)); + perm_elt(a, b, 10, 9, 6, 3, 15, 12, c[2], *data.get_unchecked(6)); + perm_elt(a, b, 11, 10, 7, 4, 0, 13, c[1], *data.get_unchecked(7)); + perm_elt(a, b, 0, 11, 8, 5, 1, 14, c[0], *data.get_unchecked(8)); + perm_elt(a, b, 1, 0, 9, 6, 2, 15, c[15], *data.get_unchecked(9)); + perm_elt(a, b, 2, 1, 10, 7, 3, 0, c[14], *data.get_unchecked(10)); + perm_elt(a, b, 3, 2, 11, 8, 4, 1, c[13], *data.get_unchecked(11)); + perm_elt(a, b, 4, 3, 12, 9, 5, 2, c[12], *data.get_unchecked(12)); + perm_elt(a, b, 5, 4, 13, 10, 6, 3, c[11], *data.get_unchecked(13)); + perm_elt(a, b, 6, 5, 14, 11, 7, 4, c[10], *data.get_unchecked(14)); + perm_elt(a, b, 7, 6, 15, 12, 8, 5, c[9], *data.get_unchecked(15)); + perm_elt(a, b, 8, 7, 0, 13, 9, 6, c[8], *data.get_unchecked(0)); + perm_elt(a, b, 9, 8, 1, 14, 10, 7, c[7], *data.get_unchecked(1)); + perm_elt(a, b, 10, 9, 2, 15, 11, 8, c[6], *data.get_unchecked(2)); + perm_elt(a, b, 11, 10, 3, 0, 12, 9, c[5], *data.get_unchecked(3)); + perm_elt(a, b, 0, 11, 4, 1, 13, 10, c[4], *data.get_unchecked(4)); + perm_elt(a, b, 1, 0, 5, 2, 14, 11, c[3], *data.get_unchecked(5)); + perm_elt(a, b, 2, 1, 6, 3, 15, 12, c[2], *data.get_unchecked(6)); + perm_elt(a, b, 3, 2, 7, 4, 0, 13, c[1], *data.get_unchecked(7)); + perm_elt(a, b, 4, 3, 8, 5, 1, 14, c[0], *data.get_unchecked(8)); + perm_elt(a, b, 5, 4, 9, 6, 2, 15, c[15], *data.get_unchecked(9)); + perm_elt(a, b, 6, 5, 10, 7, 3, 0, c[14], *data.get_unchecked(10)); + perm_elt(a, b, 7, 6, 11, 8, 4, 1, c[13], *data.get_unchecked(11)); + perm_elt(a, b, 8, 7, 12, 9, 5, 2, c[12], *data.get_unchecked(12)); + perm_elt(a, b, 9, 8, 13, 10, 6, 3, c[11], *data.get_unchecked(13)); + perm_elt(a, b, 10, 9, 14, 11, 7, 4, c[10], *data.get_unchecked(14)); + perm_elt(a, b, 11, 10, 15, 12, 8, 5, c[9], *data.get_unchecked(15)); + } +} + +#[inline(always)] +fn perm_dl(a: &mut [u32; 12], b: &mut [u32; 16], c: &[u32; 16], data_a: &[u32], data_b: &[u32]) { + unsafe { + perm_elt(a, b, 0, 11, 0, 13, 9, 6, c[8], *data_a.get_unchecked(0)); + perm_elt(a, b, 1, 0, 1, 14, 10, 7, c[7], *data_a.get_unchecked(1)); + perm_elt(a, b, 2, 1, 2, 15, 11, 8, c[6], *data_a.get_unchecked(2)); + perm_elt(a, b, 3, 2, 3, 0, 12, 9, c[5], *data_a.get_unchecked(3)); + perm_elt(a, b, 4, 3, 4, 1, 13, 10, c[4], *data_a.get_unchecked(4)); + perm_elt(a, b, 5, 4, 5, 2, 14, 11, c[3], *data_a.get_unchecked(5)); + perm_elt(a, b, 6, 5, 6, 3, 15, 12, c[2], *data_a.get_unchecked(6)); + perm_elt(a, b, 7, 6, 7, 4, 0, 13, c[1], *data_a.get_unchecked(7)); + perm_elt(a, b, 8, 7, 8, 5, 1, 14, c[0], *data_b.get_unchecked(0)); + perm_elt(a, b, 9, 8, 9, 6, 2, 15, c[15], *data_b.get_unchecked(1)); + perm_elt(a, b, 10, 9, 10, 7, 3, 0, c[14], *data_b.get_unchecked(2)); + perm_elt(a, b, 11, 10, 11, 8, 4, 1, c[13], *data_b.get_unchecked(3)); + perm_elt(a, b, 0, 11, 12, 9, 5, 2, c[12], *data_b.get_unchecked(4)); + perm_elt(a, b, 1, 0, 13, 10, 6, 3, c[11], *data_b.get_unchecked(5)); + perm_elt(a, b, 2, 1, 14, 11, 7, 4, c[10], *data_b.get_unchecked(6)); + perm_elt(a, b, 3, 2, 15, 12, 8, 5, c[9], *data_b.get_unchecked(7)); + perm_elt(a, b, 4, 3, 0, 13, 9, 6, c[8], *data_a.get_unchecked(0)); + perm_elt(a, b, 5, 4, 1, 14, 10, 7, c[7], *data_a.get_unchecked(1)); + perm_elt(a, b, 6, 5, 2, 15, 11, 8, c[6], *data_a.get_unchecked(2)); + perm_elt(a, b, 7, 6, 3, 0, 12, 9, c[5], *data_a.get_unchecked(3)); + perm_elt(a, b, 8, 7, 4, 1, 13, 10, c[4], *data_a.get_unchecked(4)); + perm_elt(a, b, 9, 8, 5, 2, 14, 11, c[3], *data_a.get_unchecked(5)); + perm_elt(a, b, 10, 9, 6, 3, 15, 12, c[2], *data_a.get_unchecked(6)); + perm_elt(a, b, 11, 10, 7, 4, 0, 13, c[1], *data_a.get_unchecked(7)); + perm_elt(a, b, 0, 11, 8, 5, 1, 14, c[0], *data_b.get_unchecked(0)); + perm_elt(a, b, 1, 0, 9, 6, 2, 15, c[15], *data_b.get_unchecked(1)); + perm_elt(a, b, 2, 1, 10, 7, 3, 0, c[14], *data_b.get_unchecked(2)); + perm_elt(a, b, 3, 2, 11, 8, 4, 1, c[13], *data_b.get_unchecked(3)); + perm_elt(a, b, 4, 3, 12, 9, 5, 2, c[12], *data_b.get_unchecked(4)); + perm_elt(a, b, 5, 4, 13, 10, 6, 3, c[11], *data_b.get_unchecked(5)); + perm_elt(a, b, 6, 5, 14, 11, 7, 4, c[10], *data_b.get_unchecked(6)); + perm_elt(a, b, 7, 6, 15, 12, 8, 5, c[9], *data_b.get_unchecked(7)); + perm_elt(a, b, 8, 7, 0, 13, 9, 6, c[8], *data_a.get_unchecked(0)); + perm_elt(a, b, 9, 8, 1, 14, 10, 7, c[7], *data_a.get_unchecked(1)); + perm_elt(a, b, 10, 9, 2, 15, 11, 8, c[6], *data_a.get_unchecked(2)); + perm_elt(a, b, 11, 10, 3, 0, 12, 9, c[5], *data_a.get_unchecked(3)); + perm_elt(a, b, 0, 11, 4, 1, 13, 10, c[4], *data_a.get_unchecked(4)); + perm_elt(a, b, 1, 0, 5, 2, 14, 11, c[3], *data_a.get_unchecked(5)); + perm_elt(a, b, 2, 1, 6, 3, 15, 12, c[2], *data_a.get_unchecked(6)); + perm_elt(a, b, 3, 2, 7, 4, 0, 13, c[1], *data_a.get_unchecked(7)); + perm_elt(a, b, 4, 3, 8, 5, 1, 14, c[0], *data_b.get_unchecked(0)); + perm_elt(a, b, 5, 4, 9, 6, 2, 15, c[15], *data_b.get_unchecked(1)); + perm_elt(a, b, 6, 5, 10, 7, 3, 0, c[14], *data_b.get_unchecked(2)); + perm_elt(a, b, 7, 6, 11, 8, 4, 1, c[13], *data_b.get_unchecked(3)); + perm_elt(a, b, 8, 7, 12, 9, 5, 2, c[12], *data_b.get_unchecked(4)); + perm_elt(a, b, 9, 8, 13, 10, 6, 3, c[11], *data_b.get_unchecked(5)); + perm_elt(a, b, 10, 9, 14, 11, 7, 4, c[10], *data_b.get_unchecked(6)); + perm_elt(a, b, 11, 10, 15, 12, 8, 5, c[9], *data_b.get_unchecked(7)); + } +} + +#[inline(always)] +fn swap_bc(b: &mut [u32; 16], c: &mut [u32; 16]) { + std::mem::swap(b, c); +} + +#[inline(always)] +fn incr_w(w_low: &mut u32, w_high: &mut u32) { + *w_low = w_low.wrapping_add(1); + if *w_low == 0 { + *w_high = w_high.wrapping_add(1); + } +} + +#[cfg(test)] +mod test { + use super::*; + const TEST_A_RESULT: [u8; 32] = [ + 0xDA, 0x8F, 0x08, 0xC0, 0x2A, 0x67, 0xBA, 0x9A, 0x56, 0xBD, 0xD0, 0x79, 0x8E, 0x48, 0xAE, + 0x07, 0x14, 0x21, 0x5E, 0x09, 0x3B, 0x5B, 0x85, 0x06, 0x49, 0xA3, 0x77, 0x18, 0x99, 0x3F, + 0x54, 0xA2, + ]; + const TEST_B_RESULT: [u8; 32] = [ + 0xB4, 0x9F, 0x34, 0xBF, 0x51, 0x86, 0x4C, 0x30, 0x53, 0x3C, 0xC4, 0x6C, 0xC2, 0x54, 0x2B, + 0xDE, 0xC2, 0xF9, 0x6F, 0xD0, 0x6F, 0x5C, 0x53, 0x9A, 0xFF, 0x6E, 0xAD, 0x58, 0x83, 0xF7, + 0x32, 0x7A, + ]; + const TEST_B_M1: [u32; 16] = [ + 0x64636261, 0x68676665, 0x6C6B6A69, 0x706F6E6D, 0x74737271, 0x78777675, 0x302D7A79, + 0x34333231, 0x38373635, 0x42412D39, 0x46454443, 0x4A494847, 0x4E4D4C4B, 0x5251504F, + 0x56555453, 0x5A595857, + ]; + const TEST_B_M2: [u32; 16] = [ + 0x3231302D, 0x36353433, 0x2D393837, 0x64636261, 0x68676665, 0x6C6B6A69, 0x706F6E6D, + 0x74737271, 0x78777675, 0x00807A79, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, + ]; + + #[test] + fn shabal256() { + // test message A + let test_data = [0u8; 64]; + let mut test_term = [0u32; 16]; + test_term[0] = 0x80; + let hash_a = shabal256_hash_fast(&test_data, &test_term); + assert_eq!(hash_a, TEST_A_RESULT); + // test message B + let hash_b = unsafe { + shabal256_hash_fast( + &std::mem::transmute::<[u32; 16], [u8; 64]>(TEST_B_M1), + &TEST_B_M2, + ) + }; + assert_eq!(hash_b, TEST_B_RESULT); + } +} diff --git a/src/shabals.rs b/src/shabals.rs deleted file mode 100644 index 90edf1e..0000000 --- a/src/shabals.rs +++ /dev/null @@ -1,75 +0,0 @@ -#![allow(dead_code)] - -use libc::{c_uchar, c_uint, c_void, size_t, uint32_t}; - -use std::mem::zeroed; - -#[repr(C)] -#[allow(non_snake_case)] -pub struct ShabalContext { - pub buf: [c_uchar; 64usize], - pub ptr: size_t, - pub state: [uint32_t; 12 + 16 + 16], - pub Wlow: uint32_t, - pub Whigh: uint32_t, - pub out_size: uint32_t, -} - -impl ::std::default::Default for ShabalContext { - fn default() -> Self { - unsafe { zeroed() } - } -} - -pub fn to_void_raw_ctx(cc: &mut T) -> *mut c_void { - let raw_cc = cc as *mut T; - raw_cc as *mut c_void -} - -pub fn to_void_raw_data(data: &[u8]) -> (*const c_void, size_t) { - let void_raw_data = data.as_ptr() as *const c_void; - let len = data.len() as size_t; - - (void_raw_data, len) -} - -pub fn to_void_raw_dest(dest: &mut [u8]) -> *mut c_void { - let raw_dest = dest as *mut [u8]; - raw_dest as *mut c_void -} - -extern "C" { - pub fn sph_shabal256_init(cc: *mut c_void, out_size: c_uint) -> (); - pub fn sph_shabal256(cc: *mut c_void, data: *const c_void, len: size_t) -> (); - pub fn sph_shabal256_close(cc: *mut c_void, dst: *mut c_void) -> (); -} - -pub fn shabal256_init(cc: &mut ShabalContext) { - let void_raw_cc = to_void_raw_ctx(cc); - unsafe { sph_shabal256_init(void_raw_cc, 256) }; -} - -pub fn shabal256_load(cc: &mut ShabalContext, data: &[u8]) { - let void_raw_cc = to_void_raw_ctx(cc); - let (void_raw_data, len) = to_void_raw_data(data); - unsafe { sph_shabal256(void_raw_cc, void_raw_data, len) }; -} - -pub fn shabal256_close(cc: &mut ShabalContext, dest: &mut [u8; 32]) { - let void_raw_cc = to_void_raw_ctx(cc); - let void_raw_dest = to_void_raw_dest(dest); - unsafe { - sph_shabal256_close(void_raw_cc, void_raw_dest); - }; -} - -pub fn shabal256(data: &[u8]) -> [u8; 32] { - let mut dest = [0; 32]; - let mut cc = ShabalContext::default(); - - shabal256_init(&mut cc); - shabal256_load(&mut cc, data); - shabal256_close(&mut cc, &mut dest); - - dest -}