Use nanors for optimized Reed-Solomon FEC decoding (#125)

This commit is contained in:
Andy Grundman
2026-02-19 00:36:52 -05:00
committed by GitHub
parent 1d0e91d91a
commit de364b6ecd
16 changed files with 5493 additions and 689 deletions

View File

@@ -17,7 +17,6 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ${CMAKE_POSITION_INDEPENDENT_CODE_BACKUP})
unset(CMAKE_POSITION_INDEPENDENT_CODE_BACKUP)
aux_source_directory(src SRC_LIST)
aux_source_directory(reedsolomon SRC_LIST)
# Build shared library by default, but allows user override
if (NOT DEFINED BUILD_SHARED_LIBS)
@@ -107,7 +106,18 @@ endif()
target_include_directories(moonlight-common-c SYSTEM PUBLIC src)
target_include_directories(moonlight-common-c PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/reedsolomon
${CMAKE_CURRENT_SOURCE_DIR}/nanors
${CMAKE_CURRENT_SOURCE_DIR}/nanors/deps/obl
)
target_compile_definitions(moonlight-common-c PRIVATE HAS_SOCKLEN_T)
# nanors
if (MSVC)
set_source_files_properties("${CMAKE_SOURCE_DIR}/src/rswrapper.c"
DIRECTORY "${CMAKE_SOURCE_DIR}")
else()
set_source_files_properties("${CMAKE_SOURCE_DIR}/src/rswrapper.c"
DIRECTORY "${CMAKE_SOURCE_DIR}"
PROPERTIES COMPILE_FLAGS "-ftree-vectorize -funroll-loops")
endif()

21
nanors/LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2021 Joseph Calderon
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,16 @@
#if defined(__AVX512F__)
#define OBLAS_AVX512
#else
#if defined(__AVX2__)
#define OBLAS_AVX2
#else
#if defined(__SSSE3__) || (defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64))
#define OBLAS_SSE3
#else
#if defined(__aarch64__) || (defined(_MSC_VER) && defined(_M_ARM64))
// To be replaced with SIMDe
// #define OBLAS_NEON
#endif
#endif
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,603 @@
/* these tables were generated with polynomial: 285 */
#ifndef GF2_8_TABLES
#define GF2_8_TABLES
/* clang-format off */
static const uint8_t GF2_8_LOG[] =
{
255, 0, 1, 25, 2, 50, 26,198, 3,223, 51,238, 27,104,199, 75,
4,100,224, 14, 52,141,239,129, 28,193,105,248,200, 8, 76,113,
5,138,101, 47,225, 36, 15, 33, 53,147,142,218,240, 18,130, 69,
29,181,194,125,106, 39,249,185,201,154, 9,120, 77,228,114,166,
6,191,139, 98,102,221, 48,253,226,152, 37,179, 16,145, 34,136,
54,208,148,206,143,150,219,189,241,210, 19, 92,131, 56, 70, 64,
30, 66,182,163,195, 72,126,110,107, 58, 40, 84,250,133,186, 61,
202, 94,155,159, 10, 21,121, 43, 78,212,229,172,115,243,167, 87,
7,112,192,247,140,128, 99, 13,103, 74,222,237, 49,197,254, 24,
227,165,153,119, 38,184,180,124, 17, 68,146,217, 35, 32,137, 46,
55, 63,209, 91,149,188,207,205,144,135,151,178,220,252,190, 97,
242, 86,211,171, 20, 42, 93,158,132, 60, 57, 83, 71,109, 65,162,
31, 45, 67,216,183,123,164,118,196, 23, 73,236,127, 12,111,246,
108,161, 59, 82, 41,157, 85,170,251, 96,134,177,187,204, 62, 90,
203, 89, 95,176,156,169,160, 81, 11,245, 22,235,122,117, 44,215,
79,174,213,233,230,231,173,232,116,214,244,234,168, 80, 88,175,
};
static const uint8_t GF2_8_EXP[] =
{
1, 2, 4, 8, 16, 32, 64,128, 29, 58,116,232,205,135, 19, 38,
76,152, 45, 90,180,117,234,201,143, 3, 6, 12, 24, 48, 96,192,
157, 39, 78,156, 37, 74,148, 53,106,212,181,119,238,193,159, 35,
70,140, 5, 10, 20, 40, 80,160, 93,186,105,210,185,111,222,161,
95,190, 97,194,153, 47, 94,188,101,202,137, 15, 30, 60,120,240,
253,231,211,187,107,214,177,127,254,225,223,163, 91,182,113,226,
217,175, 67,134, 17, 34, 68,136, 13, 26, 52,104,208,189,103,206,
129, 31, 62,124,248,237,199,147, 59,118,236,197,151, 51,102,204,
133, 23, 46, 92,184,109,218,169, 79,158, 33, 66,132, 21, 42, 84,
168, 77,154, 41, 82,164, 85,170, 73,146, 57,114,228,213,183,115,
230,209,191, 99,198,145, 63,126,252,229,215,179,123,246,241,255,
227,219,171, 75,150, 49, 98,196,149, 55,110,220,165, 87,174, 65,
130, 25, 50,100,200,141, 7, 14, 28, 56,112,224,221,167, 83,166,
81,162, 89,178,121,242,249,239,195,155, 43, 86,172, 69,138, 9,
18, 36, 72,144, 61,122,244,245,247,243,251,235,203,139, 11, 22,
44, 88,176,125,250,233,207,131, 27, 54,108,216,173, 71,142, 1,
2, 4, 8, 16, 32, 64,128, 29, 58,116,232,205,135, 19, 38, 76,
152, 45, 90,180,117,234,201,143, 3, 6, 12, 24, 48, 96,192,157,
39, 78,156, 37, 74,148, 53,106,212,181,119,238,193,159, 35, 70,
140, 5, 10, 20, 40, 80,160, 93,186,105,210,185,111,222,161, 95,
190, 97,194,153, 47, 94,188,101,202,137, 15, 30, 60,120,240,253,
231,211,187,107,214,177,127,254,225,223,163, 91,182,113,226,217,
175, 67,134, 17, 34, 68,136, 13, 26, 52,104,208,189,103,206,129,
31, 62,124,248,237,199,147, 59,118,236,197,151, 51,102,204,133,
23, 46, 92,184,109,218,169, 79,158, 33, 66,132, 21, 42, 84,168,
77,154, 41, 82,164, 85,170, 73,146, 57,114,228,213,183,115,230,
209,191, 99,198,145, 63,126,252,229,215,179,123,246,241,255,227,
219,171, 75,150, 49, 98,196,149, 55,110,220,165, 87,174, 65,130,
25, 50,100,200,141, 7, 14, 28, 56,112,224,221,167, 83,166, 81,
162, 89,178,121,242,249,239,195,155, 43, 86,172, 69,138, 9, 18,
36, 72,144, 61,122,244,245,247,243,251,235,203,139, 11, 22, 44,
88,176,125,250,233,207,131, 27, 54,108,216,173, 71,142,};
static const uint8_t GF2_8_INV[] =
{
0, 1,142,244, 71,167,122,186,173,157,221,152, 61,170, 93,150,
216,114,192, 88,224, 62, 76,102,144,222, 85,128,160,131, 75, 42,
108,237, 57, 81, 96, 86, 44,138,112,208, 31, 74, 38,139, 51,110,
72,137,111, 46,164,195, 64, 94, 80, 34,207,169,171, 12, 21,225,
54, 95,248,213,146, 78,166, 4, 48,136, 43, 30, 22,103, 69,147,
56, 35,104,140,129, 26, 37, 97, 19,193,203, 99,151, 14, 55, 65,
36, 87,202, 91,185,196, 23, 77, 82,141,239,179, 32,236, 47, 50,
40,209, 17,217,233,251,218,121,219,119, 6,187,132,205,254,252,
27, 84,161, 29,124,204,228,176, 73, 49, 39, 45, 83,105, 2,245,
24,223, 68, 79,155,188, 15, 92, 11,220,189,148,172, 9,199,162,
28,130,159,198, 52,194, 70, 5,206, 59, 13, 60,156, 8,190,183,
135,229,238,107,235,242,191,175,197,100, 7,123,149,154,174,182,
18, 89,165, 53,101,184,163,158,210,247, 98, 90,133,125,168, 58,
41,113,200,246,249, 67,215,214, 16,115,118,120,153, 10, 25,145,
20, 63,230,240,134,177,226,241,250,116,243,180,109, 33,178,106,
227,231,181,234, 3,143,211,201, 66,212,232,117,127,255,126,253,
};
static const uint8_t GF2_8_SHUF_LO[] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
0, 3, 6, 5, 12, 15, 10, 9, 24, 27, 30, 29, 20, 23, 18, 17,
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
0, 5, 10, 15, 20, 17, 30, 27, 40, 45, 34, 39, 60, 57, 54, 51,
0, 6, 12, 10, 24, 30, 20, 18, 48, 54, 60, 58, 40, 46, 36, 34,
0, 7, 14, 9, 28, 27, 18, 21, 56, 63, 54, 49, 36, 35, 42, 45,
0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96,104,112,120,
0, 9, 18, 27, 36, 45, 54, 63, 72, 65, 90, 83,108,101,126,119,
0, 10, 20, 30, 40, 34, 60, 54, 80, 90, 68, 78,120,114,108,102,
0, 11, 22, 29, 44, 39, 58, 49, 88, 83, 78, 69,116,127, 98,105,
0, 12, 24, 20, 48, 60, 40, 36, 96,108,120,116, 80, 92, 72, 68,
0, 13, 26, 23, 52, 57, 46, 35,104,101,114,127, 92, 81, 70, 75,
0, 14, 28, 18, 56, 54, 36, 42,112,126,108, 98, 72, 70, 84, 90,
0, 15, 30, 17, 60, 51, 34, 45,120,119,102,105, 68, 75, 90, 85,
0, 16, 32, 48, 64, 80, 96,112,128,144,160,176,192,208,224,240,
0, 17, 34, 51, 68, 85,102,119,136,153,170,187,204,221,238,255,
0, 18, 36, 54, 72, 90,108,126,144,130,180,166,216,202,252,238,
0, 19, 38, 53, 76, 95,106,121,152,139,190,173,212,199,242,225,
0, 20, 40, 60, 80, 68,120,108,160,180,136,156,240,228,216,204,
0, 21, 42, 63, 84, 65,126,107,168,189,130,151,252,233,214,195,
0, 22, 44, 58, 88, 78,116, 98,176,166,156,138,232,254,196,210,
0, 23, 46, 57, 92, 75,114,101,184,175,150,129,228,243,202,221,
0, 24, 48, 40, 96,120, 80, 72,192,216,240,232,160,184,144,136,
0, 25, 50, 43,100,125, 86, 79,200,209,250,227,172,181,158,135,
0, 26, 52, 46,104,114, 92, 70,208,202,228,254,184,162,140,150,
0, 27, 54, 45,108,119, 90, 65,216,195,238,245,180,175,130,153,
0, 28, 56, 36,112,108, 72, 84,224,252,216,196,144,140,168,180,
0, 29, 58, 39,116,105, 78, 83,232,245,210,207,156,129,166,187,
0, 30, 60, 34,120,102, 68, 90,240,238,204,210,136,150,180,170,
0, 31, 62, 33,124, 99, 66, 93,248,231,198,217,132,155,186,165,
0, 32, 64, 96,128,160,192,224, 29, 61, 93,125,157,189,221,253,
0, 33, 66, 99,132,165,198,231, 21, 52, 87,118,145,176,211,242,
0, 34, 68,102,136,170,204,238, 13, 47, 73,107,133,167,193,227,
0, 35, 70,101,140,175,202,233, 5, 38, 67, 96,137,170,207,236,
0, 36, 72,108,144,180,216,252, 61, 25,117, 81,173,137,229,193,
0, 37, 74,111,148,177,222,251, 53, 16,127, 90,161,132,235,206,
0, 38, 76,106,152,190,212,242, 45, 11, 97, 71,181,147,249,223,
0, 39, 78,105,156,187,210,245, 37, 2,107, 76,185,158,247,208,
0, 40, 80,120,160,136,240,216, 93,117, 13, 37,253,213,173,133,
0, 41, 82,123,164,141,246,223, 85,124, 7, 46,241,216,163,138,
0, 42, 84,126,168,130,252,214, 77,103, 25, 51,229,207,177,155,
0, 43, 86,125,172,135,250,209, 69,110, 19, 56,233,194,191,148,
0, 44, 88,116,176,156,232,196,125, 81, 37, 9,205,225,149,185,
0, 45, 90,119,180,153,238,195,117, 88, 47, 2,193,236,155,182,
0, 46, 92,114,184,150,228,202,109, 67, 49, 31,213,251,137,167,
0, 47, 94,113,188,147,226,205,101, 74, 59, 20,217,246,135,168,
0, 48, 96, 80,192,240,160,144,157,173,253,205, 93,109, 61, 13,
0, 49, 98, 83,196,245,166,151,149,164,247,198, 81, 96, 51, 2,
0, 50,100, 86,200,250,172,158,141,191,233,219, 69,119, 33, 19,
0, 51,102, 85,204,255,170,153,133,182,227,208, 73,122, 47, 28,
0, 52,104, 92,208,228,184,140,189,137,213,225,109, 89, 5, 49,
0, 53,106, 95,212,225,190,139,181,128,223,234, 97, 84, 11, 62,
0, 54,108, 90,216,238,180,130,173,155,193,247,117, 67, 25, 47,
0, 55,110, 89,220,235,178,133,165,146,203,252,121, 78, 23, 32,
0, 56,112, 72,224,216,144,168,221,229,173,149, 61, 5, 77,117,
0, 57,114, 75,228,221,150,175,213,236,167,158, 49, 8, 67,122,
0, 58,116, 78,232,210,156,166,205,247,185,131, 37, 31, 81,107,
0, 59,118, 77,236,215,154,161,197,254,179,136, 41, 18, 95,100,
0, 60,120, 68,240,204,136,180,253,193,133,185, 13, 49,117, 73,
0, 61,122, 71,244,201,142,179,245,200,143,178, 1, 60,123, 70,
0, 62,124, 66,248,198,132,186,237,211,145,175, 21, 43,105, 87,
0, 63,126, 65,252,195,130,189,229,218,155,164, 25, 38,103, 88,
0, 64,128,192, 29, 93,157,221, 58,122,186,250, 39,103,167,231,
0, 65,130,195, 25, 88,155,218, 50,115,176,241, 43,106,169,232,
0, 66,132,198, 21, 87,145,211, 42,104,174,236, 63,125,187,249,
0, 67,134,197, 17, 82,151,212, 34, 97,164,231, 51,112,181,246,
0, 68,136,204, 13, 73,133,193, 26, 94,146,214, 23, 83,159,219,
0, 69,138,207, 9, 76,131,198, 18, 87,152,221, 27, 94,145,212,
0, 70,140,202, 5, 67,137,207, 10, 76,134,192, 15, 73,131,197,
0, 71,142,201, 1, 70,143,200, 2, 69,140,203, 3, 68,141,202,
0, 72,144,216, 61,117,173,229,122, 50,234,162, 71, 15,215,159,
0, 73,146,219, 57,112,171,226,114, 59,224,169, 75, 2,217,144,
0, 74,148,222, 53,127,161,235,106, 32,254,180, 95, 21,203,129,
0, 75,150,221, 49,122,167,236, 98, 41,244,191, 83, 24,197,142,
0, 76,152,212, 45, 97,181,249, 90, 22,194,142,119, 59,239,163,
0, 77,154,215, 41,100,179,254, 82, 31,200,133,123, 54,225,172,
0, 78,156,210, 37,107,185,247, 74, 4,214,152,111, 33,243,189,
0, 79,158,209, 33,110,191,240, 66, 13,220,147, 99, 44,253,178,
0, 80,160,240, 93, 13,253,173,186,234, 26, 74,231,183, 71, 23,
0, 81,162,243, 89, 8,251,170,178,227, 16, 65,235,186, 73, 24,
0, 82,164,246, 85, 7,241,163,170,248, 14, 92,255,173, 91, 9,
0, 83,166,245, 81, 2,247,164,162,241, 4, 87,243,160, 85, 6,
0, 84,168,252, 77, 25,229,177,154,206, 50,102,215,131,127, 43,
0, 85,170,255, 73, 28,227,182,146,199, 56,109,219,142,113, 36,
0, 86,172,250, 69, 19,233,191,138,220, 38,112,207,153, 99, 53,
0, 87,174,249, 65, 22,239,184,130,213, 44,123,195,148,109, 58,
0, 88,176,232,125, 37,205,149,250,162, 74, 18,135,223, 55,111,
0, 89,178,235,121, 32,203,146,242,171, 64, 25,139,210, 57, 96,
0, 90,180,238,117, 47,193,155,234,176, 94, 4,159,197, 43,113,
0, 91,182,237,113, 42,199,156,226,185, 84, 15,147,200, 37,126,
0, 92,184,228,109, 49,213,137,218,134, 98, 62,183,235, 15, 83,
0, 93,186,231,105, 52,211,142,210,143,104, 53,187,230, 1, 92,
0, 94,188,226,101, 59,217,135,202,148,118, 40,175,241, 19, 77,
0, 95,190,225, 97, 62,223,128,194,157,124, 35,163,252, 29, 66,
0, 96,192,160,157,253, 93, 61, 39, 71,231,135,186,218,122, 26,
0, 97,194,163,153,248, 91, 58, 47, 78,237,140,182,215,116, 21,
0, 98,196,166,149,247, 81, 51, 55, 85,243,145,162,192,102, 4,
0, 99,198,165,145,242, 87, 52, 63, 92,249,154,174,205,104, 11,
0,100,200,172,141,233, 69, 33, 7, 99,207,171,138,238, 66, 38,
0,101,202,175,137,236, 67, 38, 15,106,197,160,134,227, 76, 41,
0,102,204,170,133,227, 73, 47, 23,113,219,189,146,244, 94, 56,
0,103,206,169,129,230, 79, 40, 31,120,209,182,158,249, 80, 55,
0,104,208,184,189,213,109, 5,103, 15,183,223,218,178, 10, 98,
0,105,210,187,185,208,107, 2,111, 6,189,212,214,191, 4,109,
0,106,212,190,181,223, 97, 11,119, 29,163,201,194,168, 22,124,
0,107,214,189,177,218,103, 12,127, 20,169,194,206,165, 24,115,
0,108,216,180,173,193,117, 25, 71, 43,159,243,234,134, 50, 94,
0,109,218,183,169,196,115, 30, 79, 34,149,248,230,139, 60, 81,
0,110,220,178,165,203,121, 23, 87, 57,139,229,242,156, 46, 64,
0,111,222,177,161,206,127, 16, 95, 48,129,238,254,145, 32, 79,
0,112,224,144,221,173, 61, 77,167,215, 71, 55,122, 10,154,234,
0,113,226,147,217,168, 59, 74,175,222, 77, 60,118, 7,148,229,
0,114,228,150,213,167, 49, 67,183,197, 83, 33, 98, 16,134,244,
0,115,230,149,209,162, 55, 68,191,204, 89, 42,110, 29,136,251,
0,116,232,156,205,185, 37, 81,135,243,111, 27, 74, 62,162,214,
0,117,234,159,201,188, 35, 86,143,250,101, 16, 70, 51,172,217,
0,118,236,154,197,179, 41, 95,151,225,123, 13, 82, 36,190,200,
0,119,238,153,193,182, 47, 88,159,232,113, 6, 94, 41,176,199,
0,120,240,136,253,133, 13,117,231,159, 23,111, 26, 98,234,146,
0,121,242,139,249,128, 11,114,239,150, 29,100, 22,111,228,157,
0,122,244,142,245,143, 1,123,247,141, 3,121, 2,120,246,140,
0,123,246,141,241,138, 7,124,255,132, 9,114, 14,117,248,131,
0,124,248,132,237,145, 21,105,199,187, 63, 67, 42, 86,210,174,
0,125,250,135,233,148, 19,110,207,178, 53, 72, 38, 91,220,161,
0,126,252,130,229,155, 25,103,215,169, 43, 85, 50, 76,206,176,
0,127,254,129,225,158, 31, 96,223,160, 33, 94, 62, 65,192,191,
0,128, 29,157, 58,186, 39,167,116,244,105,233, 78,206, 83,211,
0,129, 31,158, 62,191, 33,160,124,253, 99,226, 66,195, 93,220,
0,130, 25,155, 50,176, 43,169,100,230,125,255, 86,212, 79,205,
0,131, 27,152, 54,181, 45,174,108,239,119,244, 90,217, 65,194,
0,132, 21,145, 42,174, 63,187, 84,208, 65,197,126,250,107,239,
0,133, 23,146, 46,171, 57,188, 92,217, 75,206,114,247,101,224,
0,134, 17,151, 34,164, 51,181, 68,194, 85,211,102,224,119,241,
0,135, 19,148, 38,161, 53,178, 76,203, 95,216,106,237,121,254,
0,136, 13,133, 26,146, 23,159, 52,188, 57,177, 46,166, 35,171,
0,137, 15,134, 30,151, 17,152, 60,181, 51,186, 34,171, 45,164,
0,138, 9,131, 18,152, 27,145, 36,174, 45,167, 54,188, 63,181,
0,139, 11,128, 22,157, 29,150, 44,167, 39,172, 58,177, 49,186,
0,140, 5,137, 10,134, 15,131, 20,152, 17,157, 30,146, 27,151,
0,141, 7,138, 14,131, 9,132, 28,145, 27,150, 18,159, 21,152,
0,142, 1,143, 2,140, 3,141, 4,138, 5,139, 6,136, 7,137,
0,143, 3,140, 6,137, 5,138, 12,131, 15,128, 10,133, 9,134,
0,144, 61,173,122,234, 71,215,244,100,201, 89,142, 30,179, 35,
0,145, 63,174,126,239, 65,208,252,109,195, 82,130, 19,189, 44,
0,146, 57,171,114,224, 75,217,228,118,221, 79,150, 4,175, 61,
0,147, 59,168,118,229, 77,222,236,127,215, 68,154, 9,161, 50,
0,148, 53,161,106,254, 95,203,212, 64,225,117,190, 42,139, 31,
0,149, 55,162,110,251, 89,204,220, 73,235,126,178, 39,133, 16,
0,150, 49,167, 98,244, 83,197,196, 82,245, 99,166, 48,151, 1,
0,151, 51,164,102,241, 85,194,204, 91,255,104,170, 61,153, 14,
0,152, 45,181, 90,194,119,239,180, 44,153, 1,238,118,195, 91,
0,153, 47,182, 94,199,113,232,188, 37,147, 10,226,123,205, 84,
0,154, 41,179, 82,200,123,225,164, 62,141, 23,246,108,223, 69,
0,155, 43,176, 86,205,125,230,172, 55,135, 28,250, 97,209, 74,
0,156, 37,185, 74,214,111,243,148, 8,177, 45,222, 66,251,103,
0,157, 39,186, 78,211,105,244,156, 1,187, 38,210, 79,245,104,
0,158, 33,191, 66,220, 99,253,132, 26,165, 59,198, 88,231,121,
0,159, 35,188, 70,217,101,250,140, 19,175, 48,202, 85,233,118,
0,160, 93,253,186, 26,231, 71,105,201, 52,148,211,115,142, 46,
0,161, 95,254,190, 31,225, 64, 97,192, 62,159,223,126,128, 33,
0,162, 89,251,178, 16,235, 73,121,219, 32,130,203,105,146, 48,
0,163, 91,248,182, 21,237, 78,113,210, 42,137,199,100,156, 63,
0,164, 85,241,170, 14,255, 91, 73,237, 28,184,227, 71,182, 18,
0,165, 87,242,174, 11,249, 92, 65,228, 22,179,239, 74,184, 29,
0,166, 81,247,162, 4,243, 85, 89,255, 8,174,251, 93,170, 12,
0,167, 83,244,166, 1,245, 82, 81,246, 2,165,247, 80,164, 3,
0,168, 77,229,154, 50,215,127, 41,129,100,204,179, 27,254, 86,
0,169, 79,230,158, 55,209,120, 33,136,110,199,191, 22,240, 89,
0,170, 73,227,146, 56,219,113, 57,147,112,218,171, 1,226, 72,
0,171, 75,224,150, 61,221,118, 49,154,122,209,167, 12,236, 71,
0,172, 69,233,138, 38,207, 99, 9,165, 76,224,131, 47,198,106,
0,173, 71,234,142, 35,201,100, 1,172, 70,235,143, 34,200,101,
0,174, 65,239,130, 44,195,109, 25,183, 88,246,155, 53,218,116,
0,175, 67,236,134, 41,197,106, 17,190, 82,253,151, 56,212,123,
0,176,125,205,250, 74,135, 55,233, 89,148, 36, 19,163,110,222,
0,177,127,206,254, 79,129, 48,225, 80,158, 47, 31,174, 96,209,
0,178,121,203,242, 64,139, 57,249, 75,128, 50, 11,185,114,192,
0,179,123,200,246, 69,141, 62,241, 66,138, 57, 7,180,124,207,
0,180,117,193,234, 94,159, 43,201,125,188, 8, 35,151, 86,226,
0,181,119,194,238, 91,153, 44,193,116,182, 3, 47,154, 88,237,
0,182,113,199,226, 84,147, 37,217,111,168, 30, 59,141, 74,252,
0,183,115,196,230, 81,149, 34,209,102,162, 21, 55,128, 68,243,
0,184,109,213,218, 98,183, 15,169, 17,196,124,115,203, 30,166,
0,185,111,214,222,103,177, 8,161, 24,206,119,127,198, 16,169,
0,186,105,211,210,104,187, 1,185, 3,208,106,107,209, 2,184,
0,187,107,208,214,109,189, 6,177, 10,218, 97,103,220, 12,183,
0,188,101,217,202,118,175, 19,137, 53,236, 80, 67,255, 38,154,
0,189,103,218,206,115,169, 20,129, 60,230, 91, 79,242, 40,149,
0,190, 97,223,194,124,163, 29,153, 39,248, 70, 91,229, 58,132,
0,191, 99,220,198,121,165, 26,145, 46,242, 77, 87,232, 52,139,
0,192,157, 93, 39,231,186,122, 78,142,211, 19,105,169,244, 52,
0,193,159, 94, 35,226,188,125, 70,135,217, 24,101,164,250, 59,
0,194,153, 91, 47,237,182,116, 94,156,199, 5,113,179,232, 42,
0,195,155, 88, 43,232,176,115, 86,149,205, 14,125,190,230, 37,
0,196,149, 81, 55,243,162,102,110,170,251, 63, 89,157,204, 8,
0,197,151, 82, 51,246,164, 97,102,163,241, 52, 85,144,194, 7,
0,198,145, 87, 63,249,174,104,126,184,239, 41, 65,135,208, 22,
0,199,147, 84, 59,252,168,111,118,177,229, 34, 77,138,222, 25,
0,200,141, 69, 7,207,138, 66, 14,198,131, 75, 9,193,132, 76,
0,201,143, 70, 3,202,140, 69, 6,207,137, 64, 5,204,138, 67,
0,202,137, 67, 15,197,134, 76, 30,212,151, 93, 17,219,152, 82,
0,203,139, 64, 11,192,128, 75, 22,221,157, 86, 29,214,150, 93,
0,204,133, 73, 23,219,146, 94, 46,226,171,103, 57,245,188,112,
0,205,135, 74, 19,222,148, 89, 38,235,161,108, 53,248,178,127,
0,206,129, 79, 31,209,158, 80, 62,240,191,113, 33,239,160,110,
0,207,131, 76, 27,212,152, 87, 54,249,181,122, 45,226,174, 97,
0,208,189,109,103,183,218, 10,206, 30,115,163,169,121, 20,196,
0,209,191,110, 99,178,220, 13,198, 23,121,168,165,116, 26,203,
0,210,185,107,111,189,214, 4,222, 12,103,181,177, 99, 8,218,
0,211,187,104,107,184,208, 3,214, 5,109,190,189,110, 6,213,
0,212,181, 97,119,163,194, 22,238, 58, 91,143,153, 77, 44,248,
0,213,183, 98,115,166,196, 17,230, 51, 81,132,149, 64, 34,247,
0,214,177,103,127,169,206, 24,254, 40, 79,153,129, 87, 48,230,
0,215,179,100,123,172,200, 31,246, 33, 69,146,141, 90, 62,233,
0,216,173,117, 71,159,234, 50,142, 86, 35,251,201, 17,100,188,
0,217,175,118, 67,154,236, 53,134, 95, 41,240,197, 28,106,179,
0,218,169,115, 79,149,230, 60,158, 68, 55,237,209, 11,120,162,
0,219,171,112, 75,144,224, 59,150, 77, 61,230,221, 6,118,173,
0,220,165,121, 87,139,242, 46,174,114, 11,215,249, 37, 92,128,
0,221,167,122, 83,142,244, 41,166,123, 1,220,245, 40, 82,143,
0,222,161,127, 95,129,254, 32,190, 96, 31,193,225, 63, 64,158,
0,223,163,124, 91,132,248, 39,182,105, 21,202,237, 50, 78,145,
0,224,221, 61,167, 71,122,154, 83,179,142,110,244, 20, 41,201,
0,225,223, 62,163, 66,124,157, 91,186,132,101,248, 25, 39,198,
0,226,217, 59,175, 77,118,148, 67,161,154,120,236, 14, 53,215,
0,227,219, 56,171, 72,112,147, 75,168,144,115,224, 3, 59,216,
0,228,213, 49,183, 83, 98,134,115,151,166, 66,196, 32, 17,245,
0,229,215, 50,179, 86,100,129,123,158,172, 73,200, 45, 31,250,
0,230,209, 55,191, 89,110,136, 99,133,178, 84,220, 58, 13,235,
0,231,211, 52,187, 92,104,143,107,140,184, 95,208, 55, 3,228,
0,232,205, 37,135,111, 74,162, 19,251,222, 54,148,124, 89,177,
0,233,207, 38,131,106, 76,165, 27,242,212, 61,152,113, 87,190,
0,234,201, 35,143,101, 70,172, 3,233,202, 32,140,102, 69,175,
0,235,203, 32,139, 96, 64,171, 11,224,192, 43,128,107, 75,160,
0,236,197, 41,151,123, 82,190, 51,223,246, 26,164, 72, 97,141,
0,237,199, 42,147,126, 84,185, 59,214,252, 17,168, 69,111,130,
0,238,193, 47,159,113, 94,176, 35,205,226, 12,188, 82,125,147,
0,239,195, 44,155,116, 88,183, 43,196,232, 7,176, 95,115,156,
0,240,253, 13,231, 23, 26,234,211, 35, 46,222, 52,196,201, 57,
0,241,255, 14,227, 18, 28,237,219, 42, 36,213, 56,201,199, 54,
0,242,249, 11,239, 29, 22,228,195, 49, 58,200, 44,222,213, 39,
0,243,251, 8,235, 24, 16,227,203, 56, 48,195, 32,211,219, 40,
0,244,245, 1,247, 3, 2,246,243, 7, 6,242, 4,240,241, 5,
0,245,247, 2,243, 6, 4,241,251, 14, 12,249, 8,253,255, 10,
0,246,241, 7,255, 9, 14,248,227, 21, 18,228, 28,234,237, 27,
0,247,243, 4,251, 12, 8,255,235, 28, 24,239, 16,231,227, 20,
0,248,237, 21,199, 63, 42,210,147,107,126,134, 84,172,185, 65,
0,249,239, 22,195, 58, 44,213,155, 98,116,141, 88,161,183, 78,
0,250,233, 19,207, 53, 38,220,131,121,106,144, 76,182,165, 95,
0,251,235, 16,203, 48, 32,219,139,112, 96,155, 64,187,171, 80,
0,252,229, 25,215, 43, 50,206,179, 79, 86,170,100,152,129,125,
0,253,231, 26,211, 46, 52,201,187, 70, 92,161,104,149,143,114,
0,254,225, 31,223, 33, 62,192,163, 93, 66,188,124,130,157, 99,
0,255,227, 28,219, 36, 56,199,171, 84, 72,183,112,143,147,108,
};
static const uint8_t GF2_8_SHUF_HI[] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 16, 32, 48, 64, 80, 96,112,128,144,160,176,192,208,224,240,
0, 32, 64, 96,128,160,192,224, 29, 61, 93,125,157,189,221,253,
0, 48, 96, 80,192,240,160,144,157,173,253,205, 93,109, 61, 13,
0, 64,128,192, 29, 93,157,221, 58,122,186,250, 39,103,167,231,
0, 80,160,240, 93, 13,253,173,186,234, 26, 74,231,183, 71, 23,
0, 96,192,160,157,253, 93, 61, 39, 71,231,135,186,218,122, 26,
0,112,224,144,221,173, 61, 77,167,215, 71, 55,122, 10,154,234,
0,128, 29,157, 58,186, 39,167,116,244,105,233, 78,206, 83,211,
0,144, 61,173,122,234, 71,215,244,100,201, 89,142, 30,179, 35,
0,160, 93,253,186, 26,231, 71,105,201, 52,148,211,115,142, 46,
0,176,125,205,250, 74,135, 55,233, 89,148, 36, 19,163,110,222,
0,192,157, 93, 39,231,186,122, 78,142,211, 19,105,169,244, 52,
0,208,189,109,103,183,218, 10,206, 30,115,163,169,121, 20,196,
0,224,221, 61,167, 71,122,154, 83,179,142,110,244, 20, 41,201,
0,240,253, 13,231, 23, 26,234,211, 35, 46,222, 52,196,201, 57,
0, 29, 58, 39,116,105, 78, 83,232,245,210,207,156,129,166,187,
0, 13, 26, 23, 52, 57, 46, 35,104,101,114,127, 92, 81, 70, 75,
0, 61,122, 71,244,201,142,179,245,200,143,178, 1, 60,123, 70,
0, 45, 90,119,180,153,238,195,117, 88, 47, 2,193,236,155,182,
0, 93,186,231,105, 52,211,142,210,143,104, 53,187,230, 1, 92,
0, 77,154,215, 41,100,179,254, 82, 31,200,133,123, 54,225,172,
0,125,250,135,233,148, 19,110,207,178, 53, 72, 38, 91,220,161,
0,109,218,183,169,196,115, 30, 79, 34,149,248,230,139, 60, 81,
0,157, 39,186, 78,211,105,244,156, 1,187, 38,210, 79,245,104,
0,141, 7,138, 14,131, 9,132, 28,145, 27,150, 18,159, 21,152,
0,189,103,218,206,115,169, 20,129, 60,230, 91, 79,242, 40,149,
0,173, 71,234,142, 35,201,100, 1,172, 70,235,143, 34,200,101,
0,221,167,122, 83,142,244, 41,166,123, 1,220,245, 40, 82,143,
0,205,135, 74, 19,222,148, 89, 38,235,161,108, 53,248,178,127,
0,253,231, 26,211, 46, 52,201,187, 70, 92,161,104,149,143,114,
0,237,199, 42,147,126, 84,185, 59,214,252, 17,168, 69,111,130,
0, 58,116, 78,232,210,156,166,205,247,185,131, 37, 31, 81,107,
0, 42, 84,126,168,130,252,214, 77,103, 25, 51,229,207,177,155,
0, 26, 52, 46,104,114, 92, 70,208,202,228,254,184,162,140,150,
0, 10, 20, 30, 40, 34, 60, 54, 80, 90, 68, 78,120,114,108,102,
0,122,244,142,245,143, 1,123,247,141, 3,121, 2,120,246,140,
0,106,212,190,181,223, 97, 11,119, 29,163,201,194,168, 22,124,
0, 90,180,238,117, 47,193,155,234,176, 94, 4,159,197, 43,113,
0, 74,148,222, 53,127,161,235,106, 32,254,180, 95, 21,203,129,
0,186,105,211,210,104,187, 1,185, 3,208,106,107,209, 2,184,
0,170, 73,227,146, 56,219,113, 57,147,112,218,171, 1,226, 72,
0,154, 41,179, 82,200,123,225,164, 62,141, 23,246,108,223, 69,
0,138, 9,131, 18,152, 27,145, 36,174, 45,167, 54,188, 63,181,
0,250,233, 19,207, 53, 38,220,131,121,106,144, 76,182,165, 95,
0,234,201, 35,143,101, 70,172, 3,233,202, 32,140,102, 69,175,
0,218,169,115, 79,149,230, 60,158, 68, 55,237,209, 11,120,162,
0,202,137, 67, 15,197,134, 76, 30,212,151, 93, 17,219,152, 82,
0, 39, 78,105,156,187,210,245, 37, 2,107, 76,185,158,247,208,
0, 55,110, 89,220,235,178,133,165,146,203,252,121, 78, 23, 32,
0, 7, 14, 9, 28, 27, 18, 21, 56, 63, 54, 49, 36, 35, 42, 45,
0, 23, 46, 57, 92, 75,114,101,184,175,150,129,228,243,202,221,
0,103,206,169,129,230, 79, 40, 31,120,209,182,158,249, 80, 55,
0,119,238,153,193,182, 47, 88,159,232,113, 6, 94, 41,176,199,
0, 71,142,201, 1, 70,143,200, 2, 69,140,203, 3, 68,141,202,
0, 87,174,249, 65, 22,239,184,130,213, 44,123,195,148,109, 58,
0,167, 83,244,166, 1,245, 82, 81,246, 2,165,247, 80,164, 3,
0,183,115,196,230, 81,149, 34,209,102,162, 21, 55,128, 68,243,
0,135, 19,148, 38,161, 53,178, 76,203, 95,216,106,237,121,254,
0,151, 51,164,102,241, 85,194,204, 91,255,104,170, 61,153, 14,
0,231,211, 52,187, 92,104,143,107,140,184, 95,208, 55, 3,228,
0,247,243, 4,251, 12, 8,255,235, 28, 24,239, 16,231,227, 20,
0,199,147, 84, 59,252,168,111,118,177,229, 34, 77,138,222, 25,
0,215,179,100,123,172,200, 31,246, 33, 69,146,141, 90, 62,233,
0,116,232,156,205,185, 37, 81,135,243,111, 27, 74, 62,162,214,
0,100,200,172,141,233, 69, 33, 7, 99,207,171,138,238, 66, 38,
0, 84,168,252, 77, 25,229,177,154,206, 50,102,215,131,127, 43,
0, 68,136,204, 13, 73,133,193, 26, 94,146,214, 23, 83,159,219,
0, 52,104, 92,208,228,184,140,189,137,213,225,109, 89, 5, 49,
0, 36, 72,108,144,180,216,252, 61, 25,117, 81,173,137,229,193,
0, 20, 40, 60, 80, 68,120,108,160,180,136,156,240,228,216,204,
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
0,244,245, 1,247, 3, 2,246,243, 7, 6,242, 4,240,241, 5,
0,228,213, 49,183, 83, 98,134,115,151,166, 66,196, 32, 17,245,
0,212,181, 97,119,163,194, 22,238, 58, 91,143,153, 77, 44,248,
0,196,149, 81, 55,243,162,102,110,170,251, 63, 89,157,204, 8,
0,180,117,193,234, 94,159, 43,201,125,188, 8, 35,151, 86,226,
0,164, 85,241,170, 14,255, 91, 73,237, 28,184,227, 71,182, 18,
0,148, 53,161,106,254, 95,203,212, 64,225,117,190, 42,139, 31,
0,132, 21,145, 42,174, 63,187, 84,208, 65,197,126,250,107,239,
0,105,210,187,185,208,107, 2,111, 6,189,212,214,191, 4,109,
0,121,242,139,249,128, 11,114,239,150, 29,100, 22,111,228,157,
0, 73,146,219, 57,112,171,226,114, 59,224,169, 75, 2,217,144,
0, 89,178,235,121, 32,203,146,242,171, 64, 25,139,210, 57, 96,
0, 41, 82,123,164,141,246,223, 85,124, 7, 46,241,216,163,138,
0, 57,114, 75,228,221,150,175,213,236,167,158, 49, 8, 67,122,
0, 9, 18, 27, 36, 45, 54, 63, 72, 65, 90, 83,108,101,126,119,
0, 25, 50, 43,100,125, 86, 79,200,209,250,227,172,181,158,135,
0,233,207, 38,131,106, 76,165, 27,242,212, 61,152,113, 87,190,
0,249,239, 22,195, 58, 44,213,155, 98,116,141, 88,161,183, 78,
0,201,143, 70, 3,202,140, 69, 6,207,137, 64, 5,204,138, 67,
0,217,175,118, 67,154,236, 53,134, 95, 41,240,197, 28,106,179,
0,169, 79,230,158, 55,209,120, 33,136,110,199,191, 22,240, 89,
0,185,111,214,222,103,177, 8,161, 24,206,119,127,198, 16,169,
0,137, 15,134, 30,151, 17,152, 60,181, 51,186, 34,171, 45,164,
0,153, 47,182, 94,199,113,232,188, 37,147, 10,226,123,205, 84,
0, 78,156,210, 37,107,185,247, 74, 4,214,152,111, 33,243,189,
0, 94,188,226,101, 59,217,135,202,148,118, 40,175,241, 19, 77,
0,110,220,178,165,203,121, 23, 87, 57,139,229,242,156, 46, 64,
0,126,252,130,229,155, 25,103,215,169, 43, 85, 50, 76,206,176,
0, 14, 28, 18, 56, 54, 36, 42,112,126,108, 98, 72, 70, 84, 90,
0, 30, 60, 34,120,102, 68, 90,240,238,204,210,136,150,180,170,
0, 46, 92,114,184,150,228,202,109, 67, 49, 31,213,251,137,167,
0, 62,124, 66,248,198,132,186,237,211,145,175, 21, 43,105, 87,
0,206,129, 79, 31,209,158, 80, 62,240,191,113, 33,239,160,110,
0,222,161,127, 95,129,254, 32,190, 96, 31,193,225, 63, 64,158,
0,238,193, 47,159,113, 94,176, 35,205,226, 12,188, 82,125,147,
0,254,225, 31,223, 33, 62,192,163, 93, 66,188,124,130,157, 99,
0,142, 1,143, 2,140, 3,141, 4,138, 5,139, 6,136, 7,137,
0,158, 33,191, 66,220, 99,253,132, 26,165, 59,198, 88,231,121,
0,174, 65,239,130, 44,195,109, 25,183, 88,246,155, 53,218,116,
0,190, 97,223,194,124,163, 29,153, 39,248, 70, 91,229, 58,132,
0, 83,166,245, 81, 2,247,164,162,241, 4, 87,243,160, 85, 6,
0, 67,134,197, 17, 82,151,212, 34, 97,164,231, 51,112,181,246,
0,115,230,149,209,162, 55, 68,191,204, 89, 42,110, 29,136,251,
0, 99,198,165,145,242, 87, 52, 63, 92,249,154,174,205,104, 11,
0, 19, 38, 53, 76, 95,106,121,152,139,190,173,212,199,242,225,
0, 3, 6, 5, 12, 15, 10, 9, 24, 27, 30, 29, 20, 23, 18, 17,
0, 51,102, 85,204,255,170,153,133,182,227,208, 73,122, 47, 28,
0, 35, 70,101,140,175,202,233, 5, 38, 67, 96,137,170,207,236,
0,211,187,104,107,184,208, 3,214, 5,109,190,189,110, 6,213,
0,195,155, 88, 43,232,176,115, 86,149,205, 14,125,190,230, 37,
0,243,251, 8,235, 24, 16,227,203, 56, 48,195, 32,211,219, 40,
0,227,219, 56,171, 72,112,147, 75,168,144,115,224, 3, 59,216,
0,147, 59,168,118,229, 77,222,236,127,215, 68,154, 9,161, 50,
0,131, 27,152, 54,181, 45,174,108,239,119,244, 90,217, 65,194,
0,179,123,200,246, 69,141, 62,241, 66,138, 57, 7,180,124,207,
0,163, 91,248,182, 21,237, 78,113,210, 42,137,199,100,156, 63,
0,232,205, 37,135,111, 74,162, 19,251,222, 54,148,124, 89,177,
0,248,237, 21,199, 63, 42,210,147,107,126,134, 84,172,185, 65,
0,200,141, 69, 7,207,138, 66, 14,198,131, 75, 9,193,132, 76,
0,216,173,117, 71,159,234, 50,142, 86, 35,251,201, 17,100,188,
0,168, 77,229,154, 50,215,127, 41,129,100,204,179, 27,254, 86,
0,184,109,213,218, 98,183, 15,169, 17,196,124,115,203, 30,166,
0,136, 13,133, 26,146, 23,159, 52,188, 57,177, 46,166, 35,171,
0,152, 45,181, 90,194,119,239,180, 44,153, 1,238,118,195, 91,
0,104,208,184,189,213,109, 5,103, 15,183,223,218,178, 10, 98,
0,120,240,136,253,133, 13,117,231,159, 23,111, 26, 98,234,146,
0, 72,144,216, 61,117,173,229,122, 50,234,162, 71, 15,215,159,
0, 88,176,232,125, 37,205,149,250,162, 74, 18,135,223, 55,111,
0, 40, 80,120,160,136,240,216, 93,117, 13, 37,253,213,173,133,
0, 56,112, 72,224,216,144,168,221,229,173,149, 61, 5, 77,117,
0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96,104,112,120,
0, 24, 48, 40, 96,120, 80, 72,192,216,240,232,160,184,144,136,
0,245,247, 2,243, 6, 4,241,251, 14, 12,249, 8,253,255, 10,
0,229,215, 50,179, 86,100,129,123,158,172, 73,200, 45, 31,250,
0,213,183, 98,115,166,196, 17,230, 51, 81,132,149, 64, 34,247,
0,197,151, 82, 51,246,164, 97,102,163,241, 52, 85,144,194, 7,
0,181,119,194,238, 91,153, 44,193,116,182, 3, 47,154, 88,237,
0,165, 87,242,174, 11,249, 92, 65,228, 22,179,239, 74,184, 29,
0,149, 55,162,110,251, 89,204,220, 73,235,126,178, 39,133, 16,
0,133, 23,146, 46,171, 57,188, 92,217, 75,206,114,247,101,224,
0,117,234,159,201,188, 35, 86,143,250,101, 16, 70, 51,172,217,
0,101,202,175,137,236, 67, 38, 15,106,197,160,134,227, 76, 41,
0, 85,170,255, 73, 28,227,182,146,199, 56,109,219,142,113, 36,
0, 69,138,207, 9, 76,131,198, 18, 87,152,221, 27, 94,145,212,
0, 53,106, 95,212,225,190,139,181,128,223,234, 97, 84, 11, 62,
0, 37, 74,111,148,177,222,251, 53, 16,127, 90,161,132,235,206,
0, 21, 42, 63, 84, 65,126,107,168,189,130,151,252,233,214,195,
0, 5, 10, 15, 20, 17, 30, 27, 40, 45, 34, 39, 60, 57, 54, 51,
0,210,185,107,111,189,214, 4,222, 12,103,181,177, 99, 8,218,
0,194,153, 91, 47,237,182,116, 94,156,199, 5,113,179,232, 42,
0,242,249, 11,239, 29, 22,228,195, 49, 58,200, 44,222,213, 39,
0,226,217, 59,175, 77,118,148, 67,161,154,120,236, 14, 53,215,
0,146, 57,171,114,224, 75,217,228,118,221, 79,150, 4,175, 61,
0,130, 25,155, 50,176, 43,169,100,230,125,255, 86,212, 79,205,
0,178,121,203,242, 64,139, 57,249, 75,128, 50, 11,185,114,192,
0,162, 89,251,178, 16,235, 73,121,219, 32,130,203,105,146, 48,
0, 82,164,246, 85, 7,241,163,170,248, 14, 92,255,173, 91, 9,
0, 66,132,198, 21, 87,145,211, 42,104,174,236, 63,125,187,249,
0,114,228,150,213,167, 49, 67,183,197, 83, 33, 98, 16,134,244,
0, 98,196,166,149,247, 81, 51, 55, 85,243,145,162,192,102, 4,
0, 18, 36, 54, 72, 90,108,126,144,130,180,166,216,202,252,238,
0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
0, 50,100, 86,200,250,172,158,141,191,233,219, 69,119, 33, 19,
0, 34, 68,102,136,170,204,238, 13, 47, 73,107,133,167,193,227,
0,207,131, 76, 27,212,152, 87, 54,249,181,122, 45,226,174, 97,
0,223,163,124, 91,132,248, 39,182,105, 21,202,237, 50, 78,145,
0,239,195, 44,155,116, 88,183, 43,196,232, 7,176, 95,115,156,
0,255,227, 28,219, 36, 56,199,171, 84, 72,183,112,143,147,108,
0,143, 3,140, 6,137, 5,138, 12,131, 15,128, 10,133, 9,134,
0,159, 35,188, 70,217,101,250,140, 19,175, 48,202, 85,233,118,
0,175, 67,236,134, 41,197,106, 17,190, 82,253,151, 56,212,123,
0,191, 99,220,198,121,165, 26,145, 46,242, 77, 87,232, 52,139,
0, 79,158,209, 33,110,191,240, 66, 13,220,147, 99, 44,253,178,
0, 95,190,225, 97, 62,223,128,194,157,124, 35,163,252, 29, 66,
0,111,222,177,161,206,127, 16, 95, 48,129,238,254,145, 32, 79,
0,127,254,129,225,158, 31, 96,223,160, 33, 94, 62, 65,192,191,
0, 15, 30, 17, 60, 51, 34, 45,120,119,102,105, 68, 75, 90, 85,
0, 31, 62, 33,124, 99, 66, 93,248,231,198,217,132,155,186,165,
0, 47, 94,113,188,147,226,205,101, 74, 59, 20,217,246,135,168,
0, 63,126, 65,252,195,130,189,229,218,155,164, 25, 38,103, 88,
0,156, 37,185, 74,214,111,243,148, 8,177, 45,222, 66,251,103,
0,140, 5,137, 10,134, 15,131, 20,152, 17,157, 30,146, 27,151,
0,188,101,217,202,118,175, 19,137, 53,236, 80, 67,255, 38,154,
0,172, 69,233,138, 38,207, 99, 9,165, 76,224,131, 47,198,106,
0,220,165,121, 87,139,242, 46,174,114, 11,215,249, 37, 92,128,
0,204,133, 73, 23,219,146, 94, 46,226,171,103, 57,245,188,112,
0,252,229, 25,215, 43, 50,206,179, 79, 86,170,100,152,129,125,
0,236,197, 41,151,123, 82,190, 51,223,246, 26,164, 72, 97,141,
0, 28, 56, 36,112,108, 72, 84,224,252,216,196,144,140,168,180,
0, 12, 24, 20, 48, 60, 40, 36, 96,108,120,116, 80, 92, 72, 68,
0, 60,120, 68,240,204,136,180,253,193,133,185, 13, 49,117, 73,
0, 44, 88,116,176,156,232,196,125, 81, 37, 9,205,225,149,185,
0, 92,184,228,109, 49,213,137,218,134, 98, 62,183,235, 15, 83,
0, 76,152,212, 45, 97,181,249, 90, 22,194,142,119, 59,239,163,
0,124,248,132,237,145, 21,105,199,187, 63, 67, 42, 86,210,174,
0,108,216,180,173,193,117, 25, 71, 43,159,243,234,134, 50, 94,
0,129, 31,158, 62,191, 33,160,124,253, 99,226, 66,195, 93,220,
0,145, 63,174,126,239, 65,208,252,109,195, 82,130, 19,189, 44,
0,161, 95,254,190, 31,225, 64, 97,192, 62,159,223,126,128, 33,
0,177,127,206,254, 79,129, 48,225, 80,158, 47, 31,174, 96,209,
0,193,159, 94, 35,226,188,125, 70,135,217, 24,101,164,250, 59,
0,209,191,110, 99,178,220, 13,198, 23,121,168,165,116, 26,203,
0,225,223, 62,163, 66,124,157, 91,186,132,101,248, 25, 39,198,
0,241,255, 14,227, 18, 28,237,219, 42, 36,213, 56,201,199, 54,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
0, 17, 34, 51, 68, 85,102,119,136,153,170,187,204,221,238,255,
0, 33, 66, 99,132,165,198,231, 21, 52, 87,118,145,176,211,242,
0, 49, 98, 83,196,245,166,151,149,164,247,198, 81, 96, 51, 2,
0, 65,130,195, 25, 88,155,218, 50,115,176,241, 43,106,169,232,
0, 81,162,243, 89, 8,251,170,178,227, 16, 65,235,186, 73, 24,
0, 97,194,163,153,248, 91, 58, 47, 78,237,140,182,215,116, 21,
0,113,226,147,217,168, 59, 74,175,222, 77, 60,118, 7,148,229,
0,166, 81,247,162, 4,243, 85, 89,255, 8,174,251, 93,170, 12,
0,182,113,199,226, 84,147, 37,217,111,168, 30, 59,141, 74,252,
0,134, 17,151, 34,164, 51,181, 68,194, 85,211,102,224,119,241,
0,150, 49,167, 98,244, 83,197,196, 82,245, 99,166, 48,151, 1,
0,230,209, 55,191, 89,110,136, 99,133,178, 84,220, 58, 13,235,
0,246,241, 7,255, 9, 14,248,227, 21, 18,228, 28,234,237, 27,
0,198,145, 87, 63,249,174,104,126,184,239, 41, 65,135,208, 22,
0,214,177,103,127,169,206, 24,254, 40, 79,153,129, 87, 48,230,
0, 38, 76,106,152,190,212,242, 45, 11, 97, 71,181,147,249,223,
0, 54,108, 90,216,238,180,130,173,155,193,247,117, 67, 25, 47,
0, 6, 12, 10, 24, 30, 20, 18, 48, 54, 60, 58, 40, 46, 36, 34,
0, 22, 44, 58, 88, 78,116, 98,176,166,156,138,232,254,196,210,
0,102,204,170,133,227, 73, 47, 23,113,219,189,146,244, 94, 56,
0,118,236,154,197,179, 41, 95,151,225,123, 13, 82, 36,190,200,
0, 70,140,202, 5, 67,137,207, 10, 76,134,192, 15, 73,131,197,
0, 86,172,250, 69, 19,233,191,138,220, 38,112,207,153, 99, 53,
0,187,107,208,214,109,189, 6,177, 10,218, 97,103,220, 12,183,
0,171, 75,224,150, 61,221,118, 49,154,122,209,167, 12,236, 71,
0,155, 43,176, 86,205,125,230,172, 55,135, 28,250, 97,209, 74,
0,139, 11,128, 22,157, 29,150, 44,167, 39,172, 58,177, 49,186,
0,251,235, 16,203, 48, 32,219,139,112, 96,155, 64,187,171, 80,
0,235,203, 32,139, 96, 64,171, 11,224,192, 43,128,107, 75,160,
0,219,171,112, 75,144,224, 59,150, 77, 61,230,221, 6,118,173,
0,203,139, 64, 11,192,128, 75, 22,221,157, 86, 29,214,150, 93,
0, 59,118, 77,236,215,154,161,197,254,179,136, 41, 18, 95,100,
0, 43, 86,125,172,135,250,209, 69,110, 19, 56,233,194,191,148,
0, 27, 54, 45,108,119, 90, 65,216,195,238,245,180,175,130,153,
0, 11, 22, 29, 44, 39, 58, 49, 88, 83, 78, 69,116,127, 98,105,
0,123,246,141,241,138, 7,124,255,132, 9,114, 14,117,248,131,
0,107,214,189,177,218,103, 12,127, 20,169,194,206,165, 24,115,
0, 91,182,237,113, 42,199,156,226,185, 84, 15,147,200, 37,126,
0, 75,150,221, 49,122,167,236, 98, 41,244,191, 83, 24,197,142,
};
/* clang-format on */
#endif

View File

@@ -0,0 +1,285 @@
// Note about the frequent use of #undef before most defines in this file: These exist to
// prevent a lot of (harmless) redefined macro warnings if this file is re-included multiple
// times in the same build context. Sunshine and Moonlight use this trick to bundle all arch
// builds into the same binary along with runtime detection. An example can be found at
// https://github.com/LizardByte/Sunshine/blob/master/src/rswrapper.c
#include "oblas_lite.h"
#if defined(OBLAS_TINY)
static inline uint8_t gf2_8_mul(uint16_t a, uint16_t b)
{
if (!a || !b) {
return 0;
}
// Perform 8-bit, carry-less multiplication of |a| and |b|.
return GF2_8_EXP[GF2_8_LOG[a] + GF2_8_LOG[b]];
}
static void obl_axpy_ref(u8 *a, u8 *b, u8 u, unsigned k)
{
register u8 *ap = a, *ae = &a[k], *bp = b;
for (; ap != ae; ap++, bp++)
*ap ^= gf2_8_mul(u, *bp);
}
static void obl_scal_ref(u8 *a, u8 *b, u8 u, unsigned k)
{
(void)b;
register u8 *ap = a, *ae = &a[k];
for (; ap != ae; ap++)
*ap = gf2_8_mul(u, *ap);
}
#else
#include "gf2_8_mul_table.h"
static void obl_axpy_ref(u8 *a, u8 *b, u8 u, unsigned k)
{
register const u8 *u_row = &GF2_8_MUL[u << 8];
register u8 *ap = a, *ae = &a[k], *bp = b;
for (; ap != ae; ap++, bp++)
*ap ^= u_row[*bp];
}
static void obl_scal_ref(u8 *a, u8 *b, u8 u, unsigned k)
{
(void)b;
register const u8 *u_row = &GF2_8_MUL[u << 8];
register u8 *ap = a, *ae = &a[k];
for (; ap != ae; ap++)
*ap = u_row[*ap];
}
#endif
void obl_axpyb32_ref(u8 *a, u32 *b, u8 u, unsigned k)
{
for (unsigned idx = 0, p = 0; idx < k; idx += 8 * sizeof(u32), p++) {
u32 tmp = b[p];
while (tmp > 0) {
#ifdef _MSC_VER
unsigned long index = 0;
_BitScanForward(&index, tmp);
unsigned tz = (unsigned int)index;
#else
unsigned tz = __builtin_ctz(tmp);
#endif
tmp = tmp & (tmp - 1);
a[tz + idx] ^= u;
}
}
}
#if defined(OBLAS_AVX512)
#include <immintrin.h>
#undef OBLAS_ALIGN
#define OBLAS_ALIGN 64
#undef OBL_SHUF
#define OBL_SHUF(op, a, b, f) \
do { \
const u8 *u_lo = GF2_8_SHUF_LO + u * 16; \
const u8 *u_hi = GF2_8_SHUF_HI + u * 16; \
const __m512i mask = _mm512_set1_epi8(0x0f); \
const __m128i ulo_128 = _mm_loadu_si128((__m128i *)u_lo); \
const __m128i uhi_128 = _mm_loadu_si128((__m128i *)u_hi); \
const __m512i urow_lo = _mm512_broadcast_i32x4(ulo_128); \
const __m512i urow_hi = _mm512_broadcast_i32x4(uhi_128); \
__m512i *ap = (__m512i *)a, *ae = (__m512i *)(a + k - (k % sizeof(__m512i))), *bp = (__m512i *)b; \
for (; ap < ae; ap++, bp++) { \
__m512i bx = _mm512_loadu_si512(bp); \
__m512i lo = _mm512_and_si512(bx, mask); \
bx = _mm512_srli_epi64(bx, 4); \
__m512i hi = _mm512_and_si512(bx, mask); \
lo = _mm512_shuffle_epi8(urow_lo, lo); \
hi = _mm512_shuffle_epi8(urow_hi, hi); \
_mm512_storeu_si512(ap, f(_mm512_loadu_si512(ap), _mm512_xor_si512(lo, hi))); \
} \
op##_ref((u8 *)ap, (u8 *)bp, u, k % sizeof(__m512i)); \
} while (0)
#undef OBL_SHUF_XOR
#define OBL_SHUF_XOR _mm512_xor_si512
#undef OBL_AXPYB32
#define OBL_AXPYB32(a, b, u, k) \
do { \
__m512i *ap = (__m512i *)a, *ae = (__m512i *)(a + k); \
__m512i scatter = \
_mm512_set_epi32(0x03030303, 0x03030303, 0x02020202, 0x02020202, 0x01010101, 0x01010101, 0x00000000, 0x00000000, \
0x03030303, 0x03030303, 0x02020202, 0x02020202, 0x01010101, 0x01010101, 0x00000000, 0x00000000); \
__m512i cmpmask = \
_mm512_set_epi32(0x80402010, 0x08040201, 0x80402010, 0x08040201, 0x80402010, 0x08040201, 0x80402010, 0x08040201, \
0x80402010, 0x08040201, 0x80402010, 0x08040201, 0x80402010, 0x08040201, 0x80402010, 0x08040201); \
__m512i up = _mm512_set1_epi8(u); \
for (unsigned p = 0; ap < ae; p++, ap++) { \
__m512i bcast = _mm512_set1_epi32(b[p]); \
__m512i ret = _mm512_shuffle_epi8(bcast, scatter); \
ret = _mm512_andnot_si512(ret, cmpmask); \
__mmask64 tmp = _mm512_cmpeq_epi8_mask(ret, _mm512_setzero_si512()); \
ret = _mm512_mask_blend_epi8(tmp, _mm512_setzero_si512(), up); \
_mm512_storeu_si512(ap, _mm512_xor_si512(_mm512_loadu_si512(ap), ret)); \
} \
} while (0)
#else
#if defined(OBLAS_AVX2)
#include <immintrin.h>
#undef OBLAS_ALIGN
#define OBLAS_ALIGN 32
#undef OBL_SHUF
#define OBL_SHUF(op, a, b, f) \
do { \
const u8 *u_lo = GF2_8_SHUF_LO + u * 16; \
const u8 *u_hi = GF2_8_SHUF_HI + u * 16; \
const __m256i mask = _mm256_set1_epi8(0x0f); \
const __m256i urow_lo = _mm256_loadu2_m128i((__m128i *)u_lo, (__m128i *)u_lo); \
const __m256i urow_hi = _mm256_loadu2_m128i((__m128i *)u_hi, (__m128i *)u_hi); \
__m256i *ap = (__m256i *)a, *ae = (__m256i *)(a + k - (k % sizeof(__m256i))), *bp = (__m256i *)b; \
for (; ap < ae; ap++, bp++) { \
__m256i bx = _mm256_loadu_si256(bp); \
__m256i lo = _mm256_and_si256(bx, mask); \
bx = _mm256_srli_epi64(bx, 4); \
__m256i hi = _mm256_and_si256(bx, mask); \
lo = _mm256_shuffle_epi8(urow_lo, lo); \
hi = _mm256_shuffle_epi8(urow_hi, hi); \
_mm256_storeu_si256(ap, f(_mm256_loadu_si256(ap), _mm256_xor_si256(lo, hi))); \
} \
op##_ref((u8 *)ap, (u8 *)bp, u, k % sizeof(__m256i)); \
} while (0)
#undef OBL_SHUF_XOR
#define OBL_SHUF_XOR _mm256_xor_si256
#undef OBL_AXPYB32
#define OBL_AXPYB32(a, b, u, k) \
do { \
__m256i *ap = (__m256i *)a, *ae = (__m256i *)(a + k); \
__m256i scatter = \
_mm256_set_epi32(0x03030303, 0x03030303, 0x02020202, 0x02020202, 0x01010101, 0x01010101, 0x00000000, 0x00000000); \
__m256i cmpmask = \
_mm256_set_epi32(0x80402010, 0x08040201, 0x80402010, 0x08040201, 0x80402010, 0x08040201, 0x80402010, 0x08040201); \
__m256i up = _mm256_set1_epi8(u); \
for (unsigned p = 0; ap < ae; p++, ap++) { \
__m256i bcast = _mm256_set1_epi32(b[p]); \
__m256i ret = _mm256_shuffle_epi8(bcast, scatter); \
ret = _mm256_andnot_si256(ret, cmpmask); \
ret = _mm256_and_si256(_mm256_cmpeq_epi8(ret, _mm256_setzero_si256()), up); \
_mm256_storeu_si256(ap, _mm256_xor_si256(_mm256_loadu_si256(ap), ret)); \
} \
} while (0)
#else
#if defined(OBLAS_SSE3) || defined(OBLAS_NEON)
#if defined(OBLAS_NEON)
#include "sse2neon/sse2neon.h"
#else
#include <emmintrin.h>
#include <tmmintrin.h>
#endif
#undef OBLAS_ALIGN
#define OBLAS_ALIGN 16
#undef OBL_SHUF
#define OBL_SHUF(op, a, b, f) \
do { \
const u8 *u_lo = GF2_8_SHUF_LO + u * 16; \
const u8 *u_hi = GF2_8_SHUF_HI + u * 16; \
const __m128i mask = _mm_set1_epi8(0x0f); \
const __m128i urow_lo = _mm_loadu_si128((__m128i *)u_lo); \
const __m128i urow_hi = _mm_loadu_si128((__m128i *)u_hi); \
__m128i *ap = (__m128i *)a, *ae = (__m128i *)(a + k - (k % sizeof(__m128i))), *bp = (__m128i *)b; \
for (; ap < ae; ap++, bp++) { \
__m128i bx = _mm_loadu_si128(bp); \
__m128i lo = _mm_and_si128(bx, mask); \
bx = _mm_srli_epi64(bx, 4); \
__m128i hi = _mm_and_si128(bx, mask); \
lo = _mm_shuffle_epi8(urow_lo, lo); \
hi = _mm_shuffle_epi8(urow_hi, hi); \
_mm_storeu_si128(ap, f(_mm_loadu_si128(ap), _mm_xor_si128(lo, hi))); \
} \
op##_ref((u8 *)ap, (u8 *)bp, u, k % sizeof(__m128i)); \
} while (0)
#undef OBL_SHUF_XOR
#define OBL_SHUF_XOR _mm_xor_si128
#undef OBL_AXPYB32
#define OBL_AXPYB32(a, b, u, k) \
do { \
__m128i *ap = (__m128i *)a, *ae = (__m128i *)(a + k); \
__m128i scatter_hi = _mm_set_epi32(0x03030303, 0x03030303, 0x02020202, 0x02020202); \
__m128i scatter_lo = _mm_set_epi32(0x01010101, 0x01010101, 0x00000000, 0x00000000); \
__m128i cmpmask = _mm_set_epi32(0x80402010, 0x08040201, 0x80402010, 0x08040201); \
__m128i up = _mm_set1_epi8(u); \
for (unsigned p = 0; ap < ae; p++, ap++) { \
__m128i bcast = _mm_set1_epi32(b[p]); \
__m128i ret_lo = _mm_shuffle_epi8(bcast, scatter_lo); \
__m128i ret_hi = _mm_shuffle_epi8(bcast, scatter_hi); \
ret_lo = _mm_andnot_si128(ret_lo, cmpmask); \
ret_hi = _mm_andnot_si128(ret_hi, cmpmask); \
ret_lo = _mm_and_si128(_mm_cmpeq_epi8(ret_lo, _mm_setzero_si128()), up); \
ret_hi = _mm_and_si128(_mm_cmpeq_epi8(ret_hi, _mm_setzero_si128()), up); \
_mm_storeu_si128(ap, _mm_xor_si128(_mm_loadu_si128(ap), ret_lo)); \
ap++; \
_mm_storeu_si128(ap, _mm_xor_si128(_mm_loadu_si128(ap), ret_hi)); \
} \
} while (0)
#else
#undef OBLAS_ALIGN
#define OBLAS_ALIGN (sizeof(void *))
#undef OBL_SHUF
#define OBL_SHUF(op, a, b, f) \
do { \
op##_ref(a, b, u, k); \
} while (0)
#undef OBL_SHUF_XOR
#define OBL_SHUF_XOR
#undef OBL_AXPYB32
#define OBL_AXPYB32 obl_axpyb32_ref
#endif
#endif
#endif
#define OBL_NOOP(a, b) (b)
void obl_axpy(u8 *a, u8 *b, u8 u, unsigned k)
{
if (u == 1) {
register u8 *ap = a, *ae = &a[k], *bp = b;
for (; ap < ae; ap++, bp++)
*ap ^= *bp;
} else {
OBL_SHUF(obl_axpy, a, b, OBL_SHUF_XOR);
}
}
void obl_scal(u8 *a, u8 u, unsigned k)
{
OBL_SHUF(obl_scal, a, a, OBL_NOOP);
}
void obl_swap(u8 *a, u8 *b, unsigned k)
{
register u8 *ap = a, *ae = &a[k], *bp = b;
for (; ap < ae; ap++, bp++) {
u8 tmp = *ap;
*ap = *bp;
*bp = tmp;
}
}
void obl_axpyb32(u8 *a, u32 *b, u8 u, unsigned k)
{
OBL_AXPYB32(a, b, u, k);
}

View File

@@ -0,0 +1,11 @@
#include <stdint.h>
#include "gf2_8_tables.h"
typedef uint8_t u8;
typedef uint32_t u32;
void obl_axpy(u8 *a, u8 *b, u8 u, unsigned k);
void obl_scal(u8 *a, u8 u, unsigned k);
void obl_swap(u8 *a, u8 *b, unsigned k);
void obl_axpyb32(u8 *a, u32 *b, u8 u, unsigned k);

178
nanors/rs.c Normal file
View File

@@ -0,0 +1,178 @@
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "oblas_lite.c"
#include "rs.h"
static void axpy(u8 *a, u8 *b, u8 u, int k)
{
if (u == 0)
return;
if (u == 1) {
register u8 *ap = a, *ae = &a[k], *bp = b;
for (; ap < ae; ap++, bp++)
*ap ^= *bp;
} else {
obl_axpy(a, b, u, k);
}
}
static void scal(u8 *a, u8 u, int k)
{
if (u < 2)
return;
obl_scal(a, u, k);
}
static void gemm(u8 *a, u8 **b, u8 **c, int n, int k, int m)
{
int ci = 0;
for (int row = 0; row < n; row++, ci++) {
u8 *ap = a + (row * k);
memset(c[ci], 0, m);
for (int idx = 0; idx < k; idx++)
axpy(c[ci], b[idx], ap[idx], m);
}
}
static int invert_mat(u8 *src, u8 *wrk, u8 **dst, int V0, int K, int T, u8 *c, u8 *d)
{
int V0b = V0, W = K - V0;
u8 u = 0;
for (int i = 0; i < W; i++) {
int dr = d[i] * K;
for (int j = 0; j < W; j++)
wrk[i * W + j] = src[dr + c[V0 + j]];
}
for (; V0 < K; V0++) {
int dr = d[V0 - V0b] * K;
for (int row = 0; row < V0b; row++) {
u = src[dr + c[row]];
axpy(dst[c[V0]], dst[c[row]], u, T);
}
}
for (int x = 0; x < W; x++) {
u = GF2_8_INV[wrk[x * W + x]];
scal(wrk + x * W + x, u, W);
scal(dst[c[V0b + x]], u, T);
for (int row = x + 1; row < W; row++) {
u = wrk[row * W + x];
axpy(wrk + row * W, wrk + x * W, u, W);
axpy(dst[c[V0b + row]], dst[c[V0b + x]], u, T);
}
}
for (int x = W - 1; x >= 0; x--) {
u8 *from = dst[c[V0b + x]];
for (int row = 0; row < x; row++) {
u = wrk[row * W + x];
axpy(dst[c[V0b + row]], from, u, T);
}
}
return 0;
}
void reed_solomon_init(void)
{
}
reed_solomon *reed_solomon_new_static(void *buf, size_t len, int ds, int ps)
{
reed_solomon *rs = buf;
if ((ds + ps) > DATA_SHARDS_MAX || ds <= 0 || ps <= 0)
return NULL;
if (len < reed_solomon_bufsize(ds, ps))
return NULL;
memset(buf, 0, len);
rs->ds = ds;
rs->ps = ps;
rs->ts = ds + ps;
for (int j = 0; j < rs->ps; j++) {
u8 *row = rs->p + j * rs->ds;
for (int i = 0; i < rs->ds; i++)
row[i] = GF2_8_INV[(rs->ps + i) ^ j];
}
return rs;
}
reed_solomon *reed_solomon_new(int ds, int ps)
{
size_t len = reed_solomon_bufsize(ds, ps);
void *buf = malloc(len);
if (!buf)
return NULL;
if (reed_solomon_new_static(buf, len, ds, ps) == NULL) {
free(buf);
return NULL;
}
return buf;
}
void reed_solomon_release(reed_solomon *rs)
{
if (rs)
free(rs);
}
int reed_solomon_decode(reed_solomon *rs, u8 **data, u8 *marks, int nr_shards, int bs)
{
if (nr_shards < rs->ts)
return -1;
#ifdef _MSC_VER
u8 *erasures = _alloca(rs->ds);
u8 *colperm = _alloca(rs->ds);
u8 *rowperm = _alloca(rs->ds);
#else
u8 erasures[rs->ds], colperm[rs->ds], rowperm[rs->ds];
#endif
u8 *wrk = rs->p + 1 * rs->ps * rs->ds;
u8 gaps = 0;
for (int i = 0; i < rs->ds; i++)
if (marks[i])
erasures[gaps++] = i;
for (int i = 0, j = 0; i < rs->ds - gaps; i++, j++) {
while (marks[j])
j++;
colperm[i] = j;
}
for (int i = 0, j = rs->ds - gaps; i < gaps; i++, j++)
colperm[j] = erasures[i];
int i = 0;
for (int j = rs->ds; i < gaps; i++, j++) {
while (marks[j])
j++;
if (j >= nr_shards)
break;
rowperm[i] = j - rs->ds;
memcpy(data[erasures[i]], data[j], bs);
}
if (i < gaps)
return -1;
invert_mat(rs->p, wrk, data, rs->ds - gaps, rs->ds, bs, colperm, rowperm);
return 0;
}
int reed_solomon_encode(reed_solomon *rs, u8 **shards, int nr_shards, int bs)
{
if (nr_shards < rs->ts)
return -1;
gemm(rs->p, shards, shards + rs->ds, rs->ps, rs->ds, bs);
return 0;
}

26
nanors/rs.h Normal file
View File

@@ -0,0 +1,26 @@
#ifndef __RS_H_
#define __RS_H_
#include <stdint.h>
#define DATA_SHARDS_MAX 255
typedef struct _reed_solomon {
int ds;
int ps;
int ts;
uint8_t p[];
} reed_solomon;
#define reed_solomon_bufsize(ds, ps) (sizeof(reed_solomon) + 2 * (ps) * (ds))
#define reed_solomon_reconstruct reed_solomon_decode
void reed_solomon_init(void);
reed_solomon *reed_solomon_new_static(void *buf, size_t len, int ds, int ps);
reed_solomon *reed_solomon_new(int data_shards, int parity_shards);
void reed_solomon_release(reed_solomon *rs);
int reed_solomon_encode(reed_solomon *rs, uint8_t **shards, int nr_shards, int bs);
int reed_solomon_decode(reed_solomon *rs, uint8_t **shards, uint8_t *marks, int nr_shards, int bs);
#endif

View File

@@ -1,639 +0,0 @@
/*
* fec.c -- forward error correction based on Vandermonde matrices
*
* (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
* (C) 2001 Alain Knaff (alain@knaff.lu)
* (C) 2017 Iwan Timmer (irtimmer@gmail.com)
*
* Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
* Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
* Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "rs.h"
#ifdef _MSC_VER
#define NEED_ALLOCA
#define alloca(x) _alloca(x)
#endif
typedef unsigned char gf;
#define GF_BITS 8
#define GF_PP "101110001"
#define GF_SIZE ((1 << GF_BITS) - 1)
#define SWAP(a,b,t) {t tmp; tmp=a; a=b; b=tmp;}
/*
* USE_GF_MULC, GF_MULC0(c) and GF_ADDMULC(x) can be used when multiplying
* many numbers by the same constant. In this case the first
* call sets the constant, and others perform the multiplications.
* A value related to the multiplication is held in a local variable
* declared with USE_GF_MULC . See usage in addmul1().
*/
#define USE_GF_MULC register gf * __gf_mulc_
#define GF_MULC0(c) __gf_mulc_ = &gf_mul_table[(c)<<8]
#define GF_ADDMULC(dst, x) dst ^= __gf_mulc_[x]
#define GF_MULC(dst, x) dst = __gf_mulc_[x]
#define gf_mul(x,y) gf_mul_table[(x<<8)+y]
/*
* To speed up computations, we have tables for logarithm, exponent
* multiplication and inverse of a number.
*/
static gf gf_exp[2*GF_SIZE];
static int gf_log[GF_SIZE + 1];
static gf inverse[GF_SIZE+1];
#ifdef _MSC_VER
static gf __declspec(align (256)) gf_mul_table[(GF_SIZE + 1)*(GF_SIZE + 1)];
#else
static gf gf_mul_table[(GF_SIZE + 1)*(GF_SIZE + 1)] __attribute__((aligned (256)));
#endif
/*
* modnn(x) computes x % GF_SIZE, where GF_SIZE is 2**GF_BITS - 1,
* without a slow divide.
*/
static inline gf modnn(int x) {
while (x >= GF_SIZE) {
x -= GF_SIZE;
x = (x >> GF_BITS) + (x & GF_SIZE);
}
return x;
}
static void addmul(gf *dst1, gf *src1, gf c, int sz) {
USE_GF_MULC;
if (c != 0) {
register gf *dst = dst1, *src = src1;
gf *lim = &dst[sz];
GF_MULC0(c);
for (; dst < lim; dst++, src++)
GF_ADDMULC(*dst, *src);
}
}
static void mul(gf *dst1, gf *src1, gf c, int sz) {
USE_GF_MULC;
if (c != 0) {
register gf *dst = dst1, *src = src1;
gf *lim = &dst[sz];
GF_MULC0(c);
for (; dst < lim; dst++, src++)
GF_MULC(*dst , *src);
} else
memset(dst1, 0, c);
}
/* y = a.dot(b) */
static gf* multiply1(gf *a, int ar, int ac, gf *b, int br, int bc) {
gf *new_m, tg;
int r, c, i, ptr = 0;
assert(ac == br);
new_m = (gf*) calloc(1, ar*bc);
if (NULL != new_m) {
/* this multiply is slow */
for (r = 0; r < ar; r++) {
for (c = 0; c < bc; c++) {
tg = 0;
for (i = 0; i < ac; i++)
tg ^= gf_mul(a[r*ac+i], b[i*bc+c]);
new_m[ptr++] = tg;
}
}
}
return new_m;
}
static void init_mul_table(void) {
int i, j;
for (i=0; i< GF_SIZE+1; i++)
for (j=0; j< GF_SIZE+1; j++)
gf_mul_table[(i<<8)+j] = gf_exp[modnn(gf_log[i] + gf_log[j]) ] ;
for (j=0; j< GF_SIZE+1; j++)
gf_mul_table[j] = gf_mul_table[j<<8] = 0;
}
/*
* initialize the data structures used for computations in GF.
*/
static void generate_gf(void) {
int i;
gf mask;
mask = 1;
gf_exp[GF_BITS] = 0;
/*
* first, generate the (polynomial representation of) powers of \alpha,
* which are stored in gf_exp[i] = \alpha ** i .
* At the same time build gf_log[gf_exp[i]] = i .
* The first GF_BITS powers are simply bits shifted to the left.
*/
for (i = 0; i < GF_BITS; i++, mask <<= 1) {
gf_exp[i] = mask;
gf_log[gf_exp[i]] = i;
/*
* If GF_PP[i] == 1 then \alpha ** i occurs in poly-repr
* gf_exp[GF_BITS] = \alpha ** GF_BITS
*/
if (GF_PP[i] == '1')
gf_exp[GF_BITS] ^= mask;
}
/*
* now gf_exp[GF_BITS] = \alpha ** GF_BITS is complete, so can als
* compute its inverse.
*/
gf_log[gf_exp[GF_BITS]] = GF_BITS;
/*
* Poly-repr of \alpha ** (i+1) is given by poly-repr of
* \alpha ** i shifted left one-bit and accounting for any
* \alpha ** GF_BITS term that may occur when poly-repr of
* \alpha ** i is shifted.
*/
mask = 1 << (GF_BITS - 1) ;
for (i = GF_BITS + 1; i < GF_SIZE; i++) {
if (gf_exp[i - 1] >= mask)
gf_exp[i] = gf_exp[GF_BITS] ^ ((gf_exp[i - 1] ^ mask) << 1);
else
gf_exp[i] = gf_exp[i - 1] << 1;
gf_log[gf_exp[i]] = i;
}
/*
* log(0) is not defined, so use a special value
*/
gf_log[0] = GF_SIZE;
/* set the extended gf_exp values for fast multiply */
for (i = 0; i < GF_SIZE; i++)
gf_exp[i + GF_SIZE] = gf_exp[i];
/*
* again special cases. 0 has no inverse. This used to
* be initialized to GF_SIZE, but it should make no difference
* since noone is supposed to read from here.
*/
inverse[0] = 0;
inverse[1] = 1;
for (i=2; i<=GF_SIZE; i++)
inverse[i] = gf_exp[GF_SIZE-gf_log[i]];
}
/*
* invert_mat() takes a matrix and produces its inverse
* k is the size of the matrix.
* (Gauss-Jordan, adapted from Numerical Recipes in C)
* Return non-zero if singular.
*/
static int invert_mat(gf *src, int k) {
gf c, *p;
int irow, icol, row, col, i, ix;
int error = 1;
#ifdef NEED_ALLOCA
int *indxc = alloca(k*sizeof(int));
int *indxr = alloca(k*sizeof(int));
int *ipiv = alloca(k*sizeof(int));
gf *id_row = alloca(k*sizeof(gf));
#else
int indxc[k];
int indxr[k];
int ipiv[k];
gf id_row[k];
#endif
memset(id_row, 0, k*sizeof(gf));
/*
* ipiv marks elements already used as pivots.
*/
for (i = 0; i < k; i++)
ipiv[i] = 0;
for (col = 0; col < k; col++) {
gf *pivot_row;
/*
* Zeroing column 'col', look for a non-zero element.
* First try on the diagonal, if it fails, look elsewhere.
*/
irow = icol = -1;
if (ipiv[col] != 1 && src[col*k + col] != 0) {
irow = col;
icol = col;
goto found_piv;
}
for (row = 0; row < k; row++) {
if (ipiv[row] != 1) {
for (ix = 0; ix < k; ix++) {
if (ipiv[ix] == 0) {
if (src[row*k + ix] != 0) {
irow = row;
icol = ix;
goto found_piv;
}
} else if (ipiv[ix] > 1) {
fprintf(stderr, "singular matrix\n");
goto fail;
}
}
}
}
if (icol == -1) {
fprintf(stderr, "XXX pivot not found!\n");
goto fail ;
}
found_piv:
++(ipiv[icol]);
/*
* swap rows irow and icol, so afterwards the diagonal
* element will be correct. Rarely done, not worth
* optimizing.
*/
if (irow != icol) {
for (ix = 0; ix < k; ix++) {
SWAP(src[irow*k + ix], src[icol*k + ix], gf);
}
}
indxr[col] = irow;
indxc[col] = icol;
pivot_row = &src[icol*k];
c = pivot_row[icol];
if (c == 0) {
fprintf(stderr, "singular matrix 2\n");
goto fail;
} else if (c != 1 ) {
/*
* this is done often , but optimizing is not so
* fruitful, at least in the obvious ways (unrolling)
*/
c = inverse[ c ];
pivot_row[icol] = 1;
for (ix = 0; ix < k; ix++)
pivot_row[ix] = gf_mul(c, pivot_row[ix]);
}
/*
* from all rows, remove multiples of the selected row
* to zero the relevant entry (in fact, the entry is not zero
* because we know it must be zero).
* (Here, if we know that the pivot_row is the identity,
* we can optimize the addmul).
*/
id_row[icol] = 1;
if (memcmp(pivot_row, id_row, k*sizeof(gf)) != 0) {
for (p = src, ix = 0 ; ix < k ; ix++, p += k) {
if (ix != icol) {
c = p[icol];
p[icol] = 0;
addmul(p, pivot_row, c, k);
}
}
}
id_row[icol] = 0;
}
for (col = k-1 ; col >= 0 ; col-- ) {
if (indxr[col] <0 || indxr[col] >= k)
fprintf(stderr, "AARGH, indxr[col] %d\n", indxr[col]);
else if (indxc[col] <0 || indxc[col] >= k)
fprintf(stderr, "AARGH, indxc[col] %d\n", indxc[col]);
else
if (indxr[col] != indxc[col] ) {
for (row = 0 ; row < k ; row++ )
SWAP( src[row*k + indxr[col]], src[row*k + indxc[col]], gf);
}
}
error = 0;
fail:
return error ;
}
/*
* Not check for input params
* */
static gf* sub_matrix(gf* matrix, int rmin, int cmin, int rmax, int cmax, int nrows, int ncols) {
int i, j, ptr = 0;
gf* new_m = (gf*) malloc((rmax-rmin) * (cmax-cmin));
if (NULL != new_m) {
for (i = rmin; i < rmax; i++) {
for (j = cmin; j < cmax; j++) {
new_m[ptr++] = matrix[i*ncols + j];
}
}
}
return new_m;
}
/* copy from golang rs version */
static inline int code_some_shards(gf* matrixRows, gf** inputs, gf** outputs, int dataShards, int outputCount, int byteCount) {
gf* in;
int iRow, c;
for (c = 0; c < dataShards; c++) {
in = inputs[c];
for (iRow = 0; iRow < outputCount; iRow++) {
if (0 == c)
mul(outputs[iRow], in, matrixRows[iRow*dataShards+c], byteCount);
else
addmul(outputs[iRow], in, matrixRows[iRow*dataShards+c], byteCount);
}
}
return 0;
}
void reed_solomon_init(void) {
generate_gf();
init_mul_table();
}
reed_solomon* reed_solomon_new(int data_shards, int parity_shards) {
gf* vm = NULL;
gf* top = NULL;
int err = 0;
reed_solomon* rs = NULL;
do {
rs = malloc(sizeof(reed_solomon));
if (NULL == rs)
return NULL;
rs->data_shards = data_shards;
rs->parity_shards = parity_shards;
rs->shards = (data_shards + parity_shards);
rs->m = NULL;
rs->parity = NULL;
if (rs->shards > DATA_SHARDS_MAX || data_shards <= 0 || parity_shards <= 0) {
err = 1;
break;
}
vm = (gf*)malloc(data_shards * rs->shards);
if (NULL == vm) {
err = 2;
break;
}
int ptr = 0;
for (int row = 0; row < rs->shards; row++) {
for (int col = 0; col < data_shards; col++)
vm[ptr++] = row == col ? 1 : 0;
}
top = sub_matrix(vm, 0, 0, data_shards, data_shards, rs->shards, data_shards);
if (NULL == top) {
err = 3;
break;
}
err = invert_mat(top, data_shards);
assert(0 == err);
rs->m = multiply1(vm, rs->shards, data_shards, top, data_shards, data_shards);
if (NULL == rs->m) {
err = 4;
break;
}
for (int j = 0; j < parity_shards; j++) {
for (int i = 0; i < data_shards; i++)
rs->m[(data_shards + j)*data_shards + i] = inverse[(parity_shards + i) ^ j];
}
rs->parity = sub_matrix(rs->m, data_shards, 0, rs->shards, data_shards, rs->shards, data_shards);
if (NULL == rs->parity) {
err = 5;
break;
}
free(vm);
free(top);
vm = NULL;
top = NULL;
return rs;
} while(0);
fprintf(stderr, "err=%d\n", err);
if (NULL != vm)
free(vm);
if (NULL != top)
free(top);
if (NULL != rs) {
if (NULL != rs->m)
free(rs->m);
if (NULL != rs->parity)
free(rs->parity);
free(rs);
}
return NULL;
}
void reed_solomon_release(reed_solomon* rs) {
if (NULL != rs) {
if (NULL != rs->m)
free(rs->m);
if (NULL != rs->parity)
free(rs->parity);
free(rs);
}
}
/**
* decode one shard
* input:
* rs
* original data_blocks[rs->data_shards][block_size]
* dec_fec_blocks[nr_fec_blocks][block_size]
* fec_block_nos: fec pos number in original fec_blocks
* erased_blocks: erased blocks in original data_blocks
* nr_fec_blocks: the number of erased blocks
* */
static int reed_solomon_decode(reed_solomon* rs, unsigned char **data_blocks, int block_size, unsigned char **dec_fec_blocks, unsigned int *fec_block_nos, unsigned int *erased_blocks, int nr_fec_blocks) {
/* use stack instead of malloc, define a small number of DATA_SHARDS_MAX to save memory */
gf dataDecodeMatrix[DATA_SHARDS_MAX*DATA_SHARDS_MAX];
unsigned char* subShards[DATA_SHARDS_MAX];
unsigned char* outputs[DATA_SHARDS_MAX];
gf* m = rs->m;
int i, j, c, swap, subMatrixRow, dataShards;
/* the erased_blocks should always sorted
* if sorted, nr_fec_blocks times to check it
* if not, sort it here
* */
for (i = 0; i < nr_fec_blocks; i++) {
swap = 0;
for (j = i+1; j < nr_fec_blocks; j++) {
if (erased_blocks[i] > erased_blocks[j]) {
/* the prefix is bigger than the following, swap */
c = erased_blocks[i];
erased_blocks[i] = erased_blocks[j];
erased_blocks[j] = c;
swap = 1;
}
}
if (!swap)
break;
}
j = 0;
subMatrixRow = 0;
dataShards = rs->data_shards;
for (i = 0; i < dataShards; i++) {
if (j < nr_fec_blocks && i == (int)erased_blocks[j])
j++;
else {
/* this row is ok */
for (c = 0; c < dataShards; c++)
dataDecodeMatrix[subMatrixRow*dataShards + c] = m[i*dataShards + c];
subShards[subMatrixRow] = data_blocks[i];
subMatrixRow++;
}
}
for (i = 0; i < nr_fec_blocks && subMatrixRow < dataShards; i++) {
subShards[subMatrixRow] = dec_fec_blocks[i];
j = dataShards + fec_block_nos[i];
for (c = 0; c < dataShards; c++)
dataDecodeMatrix[subMatrixRow*dataShards + c] = m[j*dataShards + c];
subMatrixRow++;
}
if (subMatrixRow < dataShards)
return -1;
invert_mat(dataDecodeMatrix, dataShards);
for (i = 0; i < nr_fec_blocks; i++) {
j = erased_blocks[i];
outputs[i] = data_blocks[j];
memmove(dataDecodeMatrix+i*dataShards, dataDecodeMatrix+j*dataShards, dataShards);
}
return code_some_shards(dataDecodeMatrix, subShards, outputs, dataShards, nr_fec_blocks, block_size);
}
/**
* encode a big size of buffer
* input:
* rs
* nr_shards: assert(0 == nr_shards % rs->shards)
* shards[nr_shards][block_size]
* */
int reed_solomon_encode(reed_solomon* rs, unsigned char** shards, int nr_shards, int block_size) {
unsigned char** data_blocks;
unsigned char** fec_blocks;
int i, ds = rs->data_shards, ps = rs->parity_shards, ss = rs->shards;
i = nr_shards / ss;
data_blocks = shards;
fec_blocks = &shards[(i*ds)];
for (i = 0; i < nr_shards; i += ss) {
code_some_shards(rs->parity, data_blocks, fec_blocks, rs->data_shards, rs->parity_shards, block_size);
data_blocks += ds;
fec_blocks += ps;
}
return 0;
}
/**
* reconstruct a big size of buffer
* input:
* rs
* nr_shards: assert(0 == nr_shards % rs->data_shards)
* shards[nr_shards][block_size]
* marks[nr_shards] marks as errors
* */
int reed_solomon_reconstruct(reed_solomon* rs, unsigned char** shards, unsigned char* marks, int nr_shards, int block_size) {
unsigned char *dec_fec_blocks[DATA_SHARDS_MAX];
unsigned int fec_block_nos[DATA_SHARDS_MAX];
unsigned int erased_blocks[DATA_SHARDS_MAX];
unsigned char* fec_marks;
unsigned char **data_blocks, **fec_blocks;
int i, j, dn, pn, n;
int ds = rs->data_shards;
int ps = rs->parity_shards;
int err = 0;
data_blocks = shards;
n = nr_shards / rs->shards;
fec_marks = marks + n*ds; //after all data, is't fec marks
fec_blocks = shards + n*ds;
for (j = 0; j < n; j++) {
dn = 0;
for (i = 0; i < ds; i++) {
if (marks[i])
erased_blocks[dn++] = i;
}
if (dn > 0) {
pn = 0;
for (i = 0; i < ps && pn < dn; i++) {
if (!fec_marks[i]) {
//got valid fec row
fec_block_nos[pn] = i;
dec_fec_blocks[pn] = fec_blocks[i];
pn++;
}
}
if (dn == pn) {
reed_solomon_decode(rs, data_blocks, block_size, dec_fec_blocks, fec_block_nos, erased_blocks, dn);
} else
err = -1;
}
data_blocks += ds;
marks += ds;
fec_blocks += ps;
fec_marks += ps;
}
return err;
}

View File

@@ -1,42 +0,0 @@
#ifndef __RS_H_
#define __RS_H_
/* use small value to save memory */
#define DATA_SHARDS_MAX 255
typedef struct _reed_solomon {
int data_shards;
int parity_shards;
int shards;
unsigned char* m;
unsigned char* parity;
} reed_solomon;
/**
* MUST initial one time
* */
void reed_solomon_init(void);
reed_solomon* reed_solomon_new(int data_shards, int parity_shards);
void reed_solomon_release(reed_solomon* rs);
/**
* encode a big size of buffer
* input:
* rs
* nr_shards: assert(0 == nr_shards % rs->data_shards)
* shards[nr_shards][block_size]
* */
int reed_solomon_encode(reed_solomon* rs, unsigned char** shards, int nr_shards, int block_size);
/**
* reconstruct a big size of buffer
* input:
* rs
* nr_shards: assert(0 == nr_shards % rs->data_shards)
* shards[nr_shards][block_size]
* marks[nr_shards] marks as errors
* */
int reed_solomon_reconstruct(reed_solomon* rs, unsigned char** shards, unsigned char* marks, int nr_shards, int block_size);
#endif

View File

@@ -55,8 +55,7 @@ void RtpaInitializeQueue(PRTP_AUDIO_QUEUE queue) {
// works correctly. This is possible because the data and FEC shard count is
// constant and known in advance.
const unsigned char parity[] = { 0x77, 0x40, 0x38, 0x0e, 0xc7, 0xa7, 0x0d, 0x6c };
memcpy(&queue->rs->m[16], parity, sizeof(parity));
memcpy(queue->rs->parity, parity, sizeof(parity));
memcpy(queue->rs->p, parity, sizeof(parity));
}
static void validateFecBlockState(PRTP_AUDIO_QUEUE queue) {
@@ -444,7 +443,7 @@ static bool completeFecBlock(PRTP_AUDIO_QUEUE queue, PRTPA_FEC_BLOCK block) {
memset(block->dataPackets[dropIndex], 0, sizeof(RTP_PACKET) + block->blockSize);
#endif
int res = reed_solomon_reconstruct(queue->rs, shards, block->marks, RTPA_TOTAL_SHARDS, block->blockSize);
int res = reed_solomon_decode(queue->rs, shards, block->marks, RTPA_TOTAL_SHARDS, block->blockSize);
if (res != 0) {
// We should always have enough data to recover the entire block since we checked above.
LC_ASSERT(res == 0);

View File

@@ -2,7 +2,14 @@
#include "Video.h"
#include "rs.h"
#include "rswrapper.h"
typedef struct _reed_solomon {
int ds;
int ps;
int ts;
uint8_t p[];
} reed_solomon;
// Maximum time to wait for an OOS data/FEC shard
// after the entire FEC block should have been received

View File

@@ -1,5 +1,5 @@
#include "Limelight-internal.h"
#include "rs.h"
#include "rswrapper.h"
#if defined(LC_DEBUG) && !defined(LC_FUZZING)
// This enables FEC validation mode with a synthetic drop
@@ -328,7 +328,7 @@ static int reconstructFrame(PRTP_VIDEO_QUEUE queue) {
}
}
ret = reed_solomon_reconstruct(rs, packets, marks, totalPackets, receiveSize);
ret = reed_solomon_decode(rs, packets, marks, totalPackets, receiveSize);
// We should always provide enough parity to recover the missing data successfully.
// If this fails, something is probably wrong with our FEC state.

191
src/rswrapper.c Normal file
View File

@@ -0,0 +1,191 @@
/**
* @file src/rswrapper.c
* @brief Wrappers for nanors vectorization with different ISA options
*/
// _FORTIY_SOURCE can cause some versions of GCC to try to inline
// memset() with incompatible target options when compiling rs.c
#ifdef _FORTIFY_SOURCE
#undef _FORTIFY_SOURCE
#endif
// The assert() function is decorated with __cold on macOS which
// is incompatible with Clang's target multiversioning feature
#ifndef NDEBUG
#define NDEBUG
#endif
#define DECORATE_FUNC_I(a, b) a##b
#define DECORATE_FUNC(a, b) DECORATE_FUNC_I(a, b)
// Append an ISA suffix to the public RS API
#define reed_solomon_init DECORATE_FUNC(reed_solomon_init, ISA_SUFFIX)
#define reed_solomon_new DECORATE_FUNC(reed_solomon_new, ISA_SUFFIX)
#define reed_solomon_new_static DECORATE_FUNC(reed_solomon_new_static, ISA_SUFFIX)
#define reed_solomon_release DECORATE_FUNC(reed_solomon_release, ISA_SUFFIX)
#define reed_solomon_decode DECORATE_FUNC(reed_solomon_decode, ISA_SUFFIX)
#define reed_solomon_encode DECORATE_FUNC(reed_solomon_encode, ISA_SUFFIX)
// Append an ISA suffix to internal functions to prevent multiple definition errors
#define obl_axpy_ref DECORATE_FUNC(obl_axpy_ref, ISA_SUFFIX)
#define obl_scal_ref DECORATE_FUNC(obl_scal_ref, ISA_SUFFIX)
#define obl_axpyb32_ref DECORATE_FUNC(obl_axpyb32_ref, ISA_SUFFIX)
#define obl_axpy DECORATE_FUNC(obl_axpy, ISA_SUFFIX)
#define obl_scal DECORATE_FUNC(obl_scal, ISA_SUFFIX)
#define obl_swap DECORATE_FUNC(obl_swap, ISA_SUFFIX)
#define obl_axpyb32 DECORATE_FUNC(obl_axpyb32, ISA_SUFFIX)
#define axpy DECORATE_FUNC(axpy, ISA_SUFFIX)
#define scal DECORATE_FUNC(scal, ISA_SUFFIX)
#define gemm DECORATE_FUNC(gemm, ISA_SUFFIX)
#define invert_mat DECORATE_FUNC(invert_mat, ISA_SUFFIX)
#if defined(__x86_64__) || defined(__i386__) || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)))
// Compile a variant for SSSE3
#if defined(__clang__)
#pragma clang attribute push(__attribute__((target("ssse3"))), apply_to = function)
#elif __GNUC__
#pragma GCC push_options
#pragma GCC target("ssse3")
#endif
#define ISA_SUFFIX _ssse3
#define OBLAS_SSE3
#include "../nanors/rs.c"
#undef OBLAS_SSE3
#undef ISA_SUFFIX
#if defined(__clang__)
#pragma clang attribute pop
#elif __GNUC__
#pragma GCC pop_options
#endif
// Compile a variant for AVX2
#if defined(__clang__)
#pragma clang attribute push(__attribute__((target("avx2"))), apply_to = function)
#elif __GNUC__
#pragma GCC push_options
#pragma GCC target("avx2")
#endif
#define ISA_SUFFIX _avx2
#define OBLAS_AVX2
#include "../nanors/rs.c"
#undef OBLAS_AVX2
#undef ISA_SUFFIX
#if defined(__clang__)
#pragma clang attribute pop
#elif __GNUC__
#pragma GCC pop_options
#endif
// Compile a variant for AVX512BW
#if defined(__clang__)
#pragma clang attribute push(__attribute__((target("avx512f,avx512bw"))), apply_to = function)
#elif __GNUC__
#pragma GCC push_options
#pragma GCC target("avx512f,avx512bw")
#endif
#define ISA_SUFFIX _avx512
#define OBLAS_AVX512
#include "../nanors/rs.c"
#undef OBLAS_AVX512
#undef ISA_SUFFIX
#if defined(__clang__)
#pragma clang attribute pop
#elif __GNUC__
#pragma GCC pop_options
#endif
#endif
// Compile a default variant
#define ISA_SUFFIX _def
#include "../nanors/deps/obl/autoshim.h"
#include "../nanors/rs.c"
#undef ISA_SUFFIX
#undef reed_solomon_init
#undef reed_solomon_new
#undef reed_solomon_new_static
#undef reed_solomon_release
#undef reed_solomon_decode
#undef reed_solomon_encode
#include "rswrapper.h"
reed_solomon_new_t reed_solomon_new_fn;
reed_solomon_release_t reed_solomon_release_fn;
reed_solomon_encode_t reed_solomon_encode_fn;
reed_solomon_decode_t reed_solomon_decode_fn;
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
#if defined(_M_AMD64)
// For some reason this is needed to avoid a "C1189 No target architecture" error from winnt.h
# define _AMD64_
#endif
#include <processthreadsapi.h>
BOOL _msc_supports_ssse3(void) { return IsProcessorFeaturePresent(PF_SSSE3_INSTRUCTIONS_AVAILABLE); }
BOOL _msc_supports_avx2(void) { return IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE); }
BOOL _msc_supports_avx512f(void) { return IsProcessorFeaturePresent(PF_AVX512F_INSTRUCTIONS_AVAILABLE); }
#endif
/**
* @brief This initializes the RS function pointers to the best vectorized version available.
* @details The streaming code will directly invoke these function pointers during encoding.
*/
void reed_solomon_init(void) {
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
// Visual Studio
if (_msc_supports_avx512f()) {
reed_solomon_new_fn = reed_solomon_new_avx512;
reed_solomon_release_fn = reed_solomon_release_avx512;
reed_solomon_encode_fn = reed_solomon_encode_avx512;
reed_solomon_decode_fn = reed_solomon_decode_avx512;
reed_solomon_init_avx512();
} else if (_msc_supports_avx2()) {
reed_solomon_new_fn = reed_solomon_new_avx2;
reed_solomon_release_fn = reed_solomon_release_avx2;
reed_solomon_encode_fn = reed_solomon_encode_avx2;
reed_solomon_decode_fn = reed_solomon_decode_avx2;
reed_solomon_init_avx2();
} else if (_msc_supports_ssse3()) {
reed_solomon_new_fn = reed_solomon_new_ssse3;
reed_solomon_release_fn = reed_solomon_release_ssse3;
reed_solomon_encode_fn = reed_solomon_encode_ssse3;
reed_solomon_decode_fn = reed_solomon_decode_ssse3;
reed_solomon_init_ssse3();
} else
#elif defined(__x86_64__)
// gcc & clang
if (__builtin_cpu_supports("avx512f") && __builtin_cpu_supports("avx512bw")) {
reed_solomon_new_fn = reed_solomon_new_avx512;
reed_solomon_release_fn = reed_solomon_release_avx512;
reed_solomon_encode_fn = reed_solomon_encode_avx512;
reed_solomon_decode_fn = reed_solomon_decode_avx512;
reed_solomon_init_avx512();
} else if (__builtin_cpu_supports("avx2")) {
reed_solomon_new_fn = reed_solomon_new_avx2;
reed_solomon_release_fn = reed_solomon_release_avx2;
reed_solomon_encode_fn = reed_solomon_encode_avx2;
reed_solomon_decode_fn = reed_solomon_decode_avx2;
reed_solomon_init_avx2();
} else if (__builtin_cpu_supports("ssse3")) {
reed_solomon_new_fn = reed_solomon_new_ssse3;
reed_solomon_release_fn = reed_solomon_release_ssse3;
reed_solomon_encode_fn = reed_solomon_encode_ssse3;
reed_solomon_decode_fn = reed_solomon_decode_ssse3;
reed_solomon_init_ssse3();
} else
#endif
//
{
reed_solomon_new_fn = reed_solomon_new_def;
reed_solomon_release_fn = reed_solomon_release_def;
reed_solomon_encode_fn = reed_solomon_encode_def;
reed_solomon_decode_fn = reed_solomon_decode_def;
reed_solomon_init_def();
}
}

32
src/rswrapper.h Normal file
View File

@@ -0,0 +1,32 @@
/**
* @file src/rswrapper.h
* @brief Wrappers for nanors vectorization
* @details This is a drop-in replacement for nanors rs.h
*/
#pragma once
// standard includes
#include <stdint.h>
typedef struct _reed_solomon reed_solomon;
typedef reed_solomon *(*reed_solomon_new_t)(int data_shards, int parity_shards);
typedef void (*reed_solomon_release_t)(reed_solomon *rs);
typedef int (*reed_solomon_encode_t)(reed_solomon *rs, uint8_t **shards, int nr_shards, int bs);
typedef int (*reed_solomon_decode_t)(reed_solomon *rs, uint8_t **shards, uint8_t *marks, int nr_shards, int bs);
extern reed_solomon_new_t reed_solomon_new_fn;
extern reed_solomon_release_t reed_solomon_release_fn;
extern reed_solomon_encode_t reed_solomon_encode_fn;
extern reed_solomon_decode_t reed_solomon_decode_fn;
#define reed_solomon_new reed_solomon_new_fn
#define reed_solomon_release reed_solomon_release_fn
#define reed_solomon_encode reed_solomon_encode_fn
#define reed_solomon_decode reed_solomon_decode_fn
/**
* @brief This initializes the RS function pointers to the best vectorized version available.
* @details The streaming code will directly invoke these function pointers during encoding.
*/
void reed_solomon_init(void);