/* * Copyright (c) 2018-2020 Atmosphère-NX * Copyright (c) 2019-2022 shchmue * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "gmac.h" #include #include #include #include /* Shifts right a little endian 128-bit value. */ static void _shr_128(uint64_t *val) { val[0] >>= 1; val[0] |= (val[1] & 1) << 63; val[1] >>= 1; } /* Shifts left a little endian 128-bit value. */ static void _shl_128(uint64_t *val) { val[1] <<= 1; val[1] |= (val[0] & (1ull << 63)) >> 63; val[0] <<= 1; } /* Multiplies two 128-bit numbers X,Y in the GF(128) Galois Field. */ static void _gf128_mul(uint8_t *dst, const uint8_t *x, const uint8_t *y) { uint8_t x_work[0x10]; uint8_t y_work[0x10]; uint8_t dst_work[0x10]; uint64_t *p_x = (uint64_t *)(&x_work[0]); uint64_t *p_y = (uint64_t *)(&y_work[0]); uint64_t *p_dst = (uint64_t *)(&dst_work[0]); /* Initialize buffers. */ for (unsigned int i = 0; i < 0x10; i++) { x_work[i] = x[0xF-i]; y_work[i] = y[0xF-i]; dst_work[i] = 0; } /* Perform operation for each bit in y. */ for (unsigned int round = 0; round < 0x80; round++) { p_dst[0] ^= p_x[0] * ((y_work[0xF] & 0x80) >> 7); p_dst[1] ^= p_x[1] * ((y_work[0xF] & 0x80) >> 7); _shl_128(p_y); uint8_t xval = 0xE1 * (x_work[0] & 1); _shr_128(p_x); x_work[0xF] ^= xval; } for (unsigned int i = 0; i < 0x10; i++) { dst[i] = dst_work[0xF-i]; } } static void _ghash(u32 ks, void *dst, const void *src, u32 src_size, const void *j_block, bool encrypt) { uint8_t x[0x10] = {0}; uint8_t h[0x10]; uint64_t *p_x = (uint64_t *)(&x[0]); uint64_t *p_data = (uint64_t *)src; /* H = aes_ecb_encrypt(zeroes) */ se_aes_crypt_block_ecb(ks, ENCRYPT, h, x); u64 total_size = src_size; while (src_size >= 0x10) { /* X = (X ^ current_block) * H */ p_x[0] ^= p_data[0]; p_x[1] ^= p_data[1]; _gf128_mul(x, x, h); /* Increment p_data by 0x10 bytes. */ p_data += 2; src_size -= 0x10; } /* Nintendo's code *discards all data in the last block* if unaligned. */ /* And treats that block as though it were all-zero. */ /* This is a bug, they just forget to XOR with the copy of the last block they save. */ if (src_size & 0xF) { _gf128_mul(x, x, h); } uint64_t xor_size = total_size << 3; xor_size = __builtin_bswap64(xor_size); /* Due to a Nintendo bug, the wrong QWORD gets XOR'd in the "final output block" case. */ if (encrypt) { p_x[0] ^= xor_size; } else { p_x[1] ^= xor_size; } _gf128_mul(x, x, h); /* If final output block, XOR with encrypted J block. */ if (encrypt) { se_aes_crypt_block_ecb(ks, ENCRYPT, h, j_block); for (unsigned int i = 0; i < 0x10; i++) { x[i] ^= h[i]; } } /* Copy output. */ memcpy(dst, x, 0x10); } void calc_gmac(u32 ks, void *out_gmac, const void *data, u32 size, const void *key, const void *iv) { u32 j_block[4] = {0}; se_aes_key_set(ks, key, 0x10); _ghash(ks, j_block, iv, 0x10, NULL, false); _ghash(ks, out_gmac, data, size, j_block, true); }