/* * Copyright 2018-present Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include namespace folly { // Standard galois-field multiply. The only modification is that a, // b, m, and p are all bit-reflected. // // https://en.wikipedia.org/wiki/Finite_field_arithmetic static constexpr uint32_t gf_multiply_sw_1(size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m) { // clang-format off return i == 32 ? p : gf_multiply_sw_1( /* i = */ i + 1, /* p = */ p ^ (-((b >> 31) & 1) & a), /* a = */ (a >> 1) ^ (-(a & 1) & m), /* b = */ b << 1, /* m = */ m); // clang-format on } static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m) { return gf_multiply_sw_1(/* i = */ 0, /* p = */ 0, a, b, m); } static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) { return gf_multiply_sw(a, a, m); } namespace { template struct gf_powers_memo { static constexpr uint32_t value = gf_square_sw(gf_powers_memo::value, m); }; template struct gf_powers_memo<0, m> { static constexpr uint32_t value = m; }; template struct gf_powers_make { template constexpr auto operator()(index_sequence) const { return std::array{{gf_powers_memo::value...}}; } }; } // namespace #if FOLLY_SSE_PREREQ(4, 2) // Reduction taken from // https://www.nicst.de/crc.pdf // // This is an intrinsics-based implementation of listing 3. static uint32_t gf_multiply_crc32c_hw(uint64_t crc1, uint64_t crc2, uint32_t) { const auto crc1_xmm = _mm_set_epi64x(0, crc1); const auto crc2_xmm = _mm_set_epi64x(0, crc2); const auto count = _mm_set_epi64x(0, 1); const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00); const auto res1 = _mm_sll_epi64(res0, count); // Use hardware crc32c to do reduction from 64 -> 32 bytes const auto res2 = _mm_cvtsi128_si64(res1); const auto res3 = _mm_crc32_u32(0, res2); const auto res4 = _mm_extract_epi32(res1, 1); return res3 ^ res4; } static uint32_t gf_multiply_crc32_hw(uint64_t crc1, uint64_t crc2, uint32_t) { const auto crc1_xmm = _mm_set_epi64x(0, crc1); const auto crc2_xmm = _mm_set_epi64x(0, crc2); const auto count = _mm_set_epi64x(0, 1); const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00); const auto res1 = _mm_sll_epi64(res0, count); // Do barrett reduction of 64 -> 32 bytes const auto mask32 = _mm_set_epi32(0, 0, 0, 0xFFFFFFFF); const auto barrett_reduction_constants = _mm_set_epi32(0x1, 0xDB710641, 0x1, 0xF7011641); const auto res2 = _mm_clmulepi64_si128( _mm_and_si128(res1, mask32), barrett_reduction_constants, 0x00); const auto res3 = _mm_clmulepi64_si128( _mm_and_si128(res2, mask32), barrett_reduction_constants, 0x10); return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(res3, res1), 4)); } #else static uint32_t gf_multiply_crc32c_hw(uint64_t, uint64_t, uint32_t) { return 0; } static uint32_t gf_multiply_crc32_hw(uint64_t, uint64_t, uint32_t) { return 0; } #endif static constexpr uint32_t crc32c_m = 0x82f63b78; static constexpr uint32_t crc32_m = 0xedb88320; /* * Pre-calculated powers tables for crc32c and crc32. */ static constexpr std::array const crc32c_powers = gf_powers_make{}(make_index_sequence<62>{}); static constexpr std::array const crc32_powers = gf_powers_make{}(make_index_sequence<62>{}); template static uint32_t crc32_append_zeroes( F mult, uint32_t crc, size_t len, uint32_t polynomial, std::array const& powers_array) { auto powers = powers_array.data(); // Append by multiplying by consecutive powers of two of the zeroes // array len >>= 2; while (len) { // Advance directly to next bit set. auto r = findFirstSet(len) - 1; len >>= r; powers += r; crc = mult(crc, *powers, polynomial); len >>= 1; powers++; } return crc; } namespace detail { uint32_t crc32_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) { return crc2 ^ crc32_append_zeroes(gf_multiply_sw, crc1, crc2len, crc32_m, crc32_powers); } uint32_t crc32_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) { return crc2 ^ crc32_append_zeroes( gf_multiply_crc32_hw, crc1, crc2len, crc32_m, crc32_powers); } uint32_t crc32c_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) { return crc2 ^ crc32_append_zeroes( gf_multiply_sw, crc1, crc2len, crc32c_m, crc32c_powers); } uint32_t crc32c_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) { return crc2 ^ crc32_append_zeroes( gf_multiply_crc32c_hw, crc1, crc2len, crc32c_m, crc32c_powers); } } // namespace detail } // namespace folly