mirror of
https://github.com/moonlight-stream/moonlight-common-c.git
synced 2026-04-02 22:06:10 +00:00
Use nanors for optimized Reed-Solomon FEC decoding (#125)
This commit is contained in:
16
nanors/deps/obl/autoshim.h
Normal file
16
nanors/deps/obl/autoshim.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#if defined(__AVX512F__)
|
||||
#define OBLAS_AVX512
|
||||
#else
|
||||
#if defined(__AVX2__)
|
||||
#define OBLAS_AVX2
|
||||
#else
|
||||
#if defined(__SSSE3__) || (defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64))
|
||||
#define OBLAS_SSE3
|
||||
#else
|
||||
#if defined(__aarch64__) || (defined(_MSC_VER) && defined(_M_ARM64))
|
||||
// To be replaced with SIMDe
|
||||
// #define OBLAS_NEON
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
4106
nanors/deps/obl/gf2_8_mul_table.h
Normal file
4106
nanors/deps/obl/gf2_8_mul_table.h
Normal file
File diff suppressed because it is too large
Load Diff
603
nanors/deps/obl/gf2_8_tables.h
Normal file
603
nanors/deps/obl/gf2_8_tables.h
Normal file
@@ -0,0 +1,603 @@
|
||||
/* these tables were generated with polynomial: 285 */
|
||||
|
||||
#ifndef GF2_8_TABLES
|
||||
#define GF2_8_TABLES
|
||||
|
||||
/* clang-format off */
|
||||
static const uint8_t GF2_8_LOG[] =
|
||||
{
|
||||
255, 0, 1, 25, 2, 50, 26,198, 3,223, 51,238, 27,104,199, 75,
|
||||
4,100,224, 14, 52,141,239,129, 28,193,105,248,200, 8, 76,113,
|
||||
5,138,101, 47,225, 36, 15, 33, 53,147,142,218,240, 18,130, 69,
|
||||
29,181,194,125,106, 39,249,185,201,154, 9,120, 77,228,114,166,
|
||||
6,191,139, 98,102,221, 48,253,226,152, 37,179, 16,145, 34,136,
|
||||
54,208,148,206,143,150,219,189,241,210, 19, 92,131, 56, 70, 64,
|
||||
30, 66,182,163,195, 72,126,110,107, 58, 40, 84,250,133,186, 61,
|
||||
202, 94,155,159, 10, 21,121, 43, 78,212,229,172,115,243,167, 87,
|
||||
7,112,192,247,140,128, 99, 13,103, 74,222,237, 49,197,254, 24,
|
||||
227,165,153,119, 38,184,180,124, 17, 68,146,217, 35, 32,137, 46,
|
||||
55, 63,209, 91,149,188,207,205,144,135,151,178,220,252,190, 97,
|
||||
242, 86,211,171, 20, 42, 93,158,132, 60, 57, 83, 71,109, 65,162,
|
||||
31, 45, 67,216,183,123,164,118,196, 23, 73,236,127, 12,111,246,
|
||||
108,161, 59, 82, 41,157, 85,170,251, 96,134,177,187,204, 62, 90,
|
||||
203, 89, 95,176,156,169,160, 81, 11,245, 22,235,122,117, 44,215,
|
||||
79,174,213,233,230,231,173,232,116,214,244,234,168, 80, 88,175,
|
||||
};
|
||||
|
||||
static const uint8_t GF2_8_EXP[] =
|
||||
{
|
||||
1, 2, 4, 8, 16, 32, 64,128, 29, 58,116,232,205,135, 19, 38,
|
||||
76,152, 45, 90,180,117,234,201,143, 3, 6, 12, 24, 48, 96,192,
|
||||
157, 39, 78,156, 37, 74,148, 53,106,212,181,119,238,193,159, 35,
|
||||
70,140, 5, 10, 20, 40, 80,160, 93,186,105,210,185,111,222,161,
|
||||
95,190, 97,194,153, 47, 94,188,101,202,137, 15, 30, 60,120,240,
|
||||
253,231,211,187,107,214,177,127,254,225,223,163, 91,182,113,226,
|
||||
217,175, 67,134, 17, 34, 68,136, 13, 26, 52,104,208,189,103,206,
|
||||
129, 31, 62,124,248,237,199,147, 59,118,236,197,151, 51,102,204,
|
||||
133, 23, 46, 92,184,109,218,169, 79,158, 33, 66,132, 21, 42, 84,
|
||||
168, 77,154, 41, 82,164, 85,170, 73,146, 57,114,228,213,183,115,
|
||||
230,209,191, 99,198,145, 63,126,252,229,215,179,123,246,241,255,
|
||||
227,219,171, 75,150, 49, 98,196,149, 55,110,220,165, 87,174, 65,
|
||||
130, 25, 50,100,200,141, 7, 14, 28, 56,112,224,221,167, 83,166,
|
||||
81,162, 89,178,121,242,249,239,195,155, 43, 86,172, 69,138, 9,
|
||||
18, 36, 72,144, 61,122,244,245,247,243,251,235,203,139, 11, 22,
|
||||
44, 88,176,125,250,233,207,131, 27, 54,108,216,173, 71,142, 1,
|
||||
2, 4, 8, 16, 32, 64,128, 29, 58,116,232,205,135, 19, 38, 76,
|
||||
152, 45, 90,180,117,234,201,143, 3, 6, 12, 24, 48, 96,192,157,
|
||||
39, 78,156, 37, 74,148, 53,106,212,181,119,238,193,159, 35, 70,
|
||||
140, 5, 10, 20, 40, 80,160, 93,186,105,210,185,111,222,161, 95,
|
||||
190, 97,194,153, 47, 94,188,101,202,137, 15, 30, 60,120,240,253,
|
||||
231,211,187,107,214,177,127,254,225,223,163, 91,182,113,226,217,
|
||||
175, 67,134, 17, 34, 68,136, 13, 26, 52,104,208,189,103,206,129,
|
||||
31, 62,124,248,237,199,147, 59,118,236,197,151, 51,102,204,133,
|
||||
23, 46, 92,184,109,218,169, 79,158, 33, 66,132, 21, 42, 84,168,
|
||||
77,154, 41, 82,164, 85,170, 73,146, 57,114,228,213,183,115,230,
|
||||
209,191, 99,198,145, 63,126,252,229,215,179,123,246,241,255,227,
|
||||
219,171, 75,150, 49, 98,196,149, 55,110,220,165, 87,174, 65,130,
|
||||
25, 50,100,200,141, 7, 14, 28, 56,112,224,221,167, 83,166, 81,
|
||||
162, 89,178,121,242,249,239,195,155, 43, 86,172, 69,138, 9, 18,
|
||||
36, 72,144, 61,122,244,245,247,243,251,235,203,139, 11, 22, 44,
|
||||
88,176,125,250,233,207,131, 27, 54,108,216,173, 71,142,};
|
||||
|
||||
static const uint8_t GF2_8_INV[] =
|
||||
{
|
||||
0, 1,142,244, 71,167,122,186,173,157,221,152, 61,170, 93,150,
|
||||
216,114,192, 88,224, 62, 76,102,144,222, 85,128,160,131, 75, 42,
|
||||
108,237, 57, 81, 96, 86, 44,138,112,208, 31, 74, 38,139, 51,110,
|
||||
72,137,111, 46,164,195, 64, 94, 80, 34,207,169,171, 12, 21,225,
|
||||
54, 95,248,213,146, 78,166, 4, 48,136, 43, 30, 22,103, 69,147,
|
||||
56, 35,104,140,129, 26, 37, 97, 19,193,203, 99,151, 14, 55, 65,
|
||||
36, 87,202, 91,185,196, 23, 77, 82,141,239,179, 32,236, 47, 50,
|
||||
40,209, 17,217,233,251,218,121,219,119, 6,187,132,205,254,252,
|
||||
27, 84,161, 29,124,204,228,176, 73, 49, 39, 45, 83,105, 2,245,
|
||||
24,223, 68, 79,155,188, 15, 92, 11,220,189,148,172, 9,199,162,
|
||||
28,130,159,198, 52,194, 70, 5,206, 59, 13, 60,156, 8,190,183,
|
||||
135,229,238,107,235,242,191,175,197,100, 7,123,149,154,174,182,
|
||||
18, 89,165, 53,101,184,163,158,210,247, 98, 90,133,125,168, 58,
|
||||
41,113,200,246,249, 67,215,214, 16,115,118,120,153, 10, 25,145,
|
||||
20, 63,230,240,134,177,226,241,250,116,243,180,109, 33,178,106,
|
||||
227,231,181,234, 3,143,211,201, 66,212,232,117,127,255,126,253,
|
||||
};
|
||||
|
||||
static const uint8_t GF2_8_SHUF_LO[] =
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
|
||||
0, 3, 6, 5, 12, 15, 10, 9, 24, 27, 30, 29, 20, 23, 18, 17,
|
||||
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
|
||||
0, 5, 10, 15, 20, 17, 30, 27, 40, 45, 34, 39, 60, 57, 54, 51,
|
||||
0, 6, 12, 10, 24, 30, 20, 18, 48, 54, 60, 58, 40, 46, 36, 34,
|
||||
0, 7, 14, 9, 28, 27, 18, 21, 56, 63, 54, 49, 36, 35, 42, 45,
|
||||
0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96,104,112,120,
|
||||
0, 9, 18, 27, 36, 45, 54, 63, 72, 65, 90, 83,108,101,126,119,
|
||||
0, 10, 20, 30, 40, 34, 60, 54, 80, 90, 68, 78,120,114,108,102,
|
||||
0, 11, 22, 29, 44, 39, 58, 49, 88, 83, 78, 69,116,127, 98,105,
|
||||
0, 12, 24, 20, 48, 60, 40, 36, 96,108,120,116, 80, 92, 72, 68,
|
||||
0, 13, 26, 23, 52, 57, 46, 35,104,101,114,127, 92, 81, 70, 75,
|
||||
0, 14, 28, 18, 56, 54, 36, 42,112,126,108, 98, 72, 70, 84, 90,
|
||||
0, 15, 30, 17, 60, 51, 34, 45,120,119,102,105, 68, 75, 90, 85,
|
||||
0, 16, 32, 48, 64, 80, 96,112,128,144,160,176,192,208,224,240,
|
||||
0, 17, 34, 51, 68, 85,102,119,136,153,170,187,204,221,238,255,
|
||||
0, 18, 36, 54, 72, 90,108,126,144,130,180,166,216,202,252,238,
|
||||
0, 19, 38, 53, 76, 95,106,121,152,139,190,173,212,199,242,225,
|
||||
0, 20, 40, 60, 80, 68,120,108,160,180,136,156,240,228,216,204,
|
||||
0, 21, 42, 63, 84, 65,126,107,168,189,130,151,252,233,214,195,
|
||||
0, 22, 44, 58, 88, 78,116, 98,176,166,156,138,232,254,196,210,
|
||||
0, 23, 46, 57, 92, 75,114,101,184,175,150,129,228,243,202,221,
|
||||
0, 24, 48, 40, 96,120, 80, 72,192,216,240,232,160,184,144,136,
|
||||
0, 25, 50, 43,100,125, 86, 79,200,209,250,227,172,181,158,135,
|
||||
0, 26, 52, 46,104,114, 92, 70,208,202,228,254,184,162,140,150,
|
||||
0, 27, 54, 45,108,119, 90, 65,216,195,238,245,180,175,130,153,
|
||||
0, 28, 56, 36,112,108, 72, 84,224,252,216,196,144,140,168,180,
|
||||
0, 29, 58, 39,116,105, 78, 83,232,245,210,207,156,129,166,187,
|
||||
0, 30, 60, 34,120,102, 68, 90,240,238,204,210,136,150,180,170,
|
||||
0, 31, 62, 33,124, 99, 66, 93,248,231,198,217,132,155,186,165,
|
||||
0, 32, 64, 96,128,160,192,224, 29, 61, 93,125,157,189,221,253,
|
||||
0, 33, 66, 99,132,165,198,231, 21, 52, 87,118,145,176,211,242,
|
||||
0, 34, 68,102,136,170,204,238, 13, 47, 73,107,133,167,193,227,
|
||||
0, 35, 70,101,140,175,202,233, 5, 38, 67, 96,137,170,207,236,
|
||||
0, 36, 72,108,144,180,216,252, 61, 25,117, 81,173,137,229,193,
|
||||
0, 37, 74,111,148,177,222,251, 53, 16,127, 90,161,132,235,206,
|
||||
0, 38, 76,106,152,190,212,242, 45, 11, 97, 71,181,147,249,223,
|
||||
0, 39, 78,105,156,187,210,245, 37, 2,107, 76,185,158,247,208,
|
||||
0, 40, 80,120,160,136,240,216, 93,117, 13, 37,253,213,173,133,
|
||||
0, 41, 82,123,164,141,246,223, 85,124, 7, 46,241,216,163,138,
|
||||
0, 42, 84,126,168,130,252,214, 77,103, 25, 51,229,207,177,155,
|
||||
0, 43, 86,125,172,135,250,209, 69,110, 19, 56,233,194,191,148,
|
||||
0, 44, 88,116,176,156,232,196,125, 81, 37, 9,205,225,149,185,
|
||||
0, 45, 90,119,180,153,238,195,117, 88, 47, 2,193,236,155,182,
|
||||
0, 46, 92,114,184,150,228,202,109, 67, 49, 31,213,251,137,167,
|
||||
0, 47, 94,113,188,147,226,205,101, 74, 59, 20,217,246,135,168,
|
||||
0, 48, 96, 80,192,240,160,144,157,173,253,205, 93,109, 61, 13,
|
||||
0, 49, 98, 83,196,245,166,151,149,164,247,198, 81, 96, 51, 2,
|
||||
0, 50,100, 86,200,250,172,158,141,191,233,219, 69,119, 33, 19,
|
||||
0, 51,102, 85,204,255,170,153,133,182,227,208, 73,122, 47, 28,
|
||||
0, 52,104, 92,208,228,184,140,189,137,213,225,109, 89, 5, 49,
|
||||
0, 53,106, 95,212,225,190,139,181,128,223,234, 97, 84, 11, 62,
|
||||
0, 54,108, 90,216,238,180,130,173,155,193,247,117, 67, 25, 47,
|
||||
0, 55,110, 89,220,235,178,133,165,146,203,252,121, 78, 23, 32,
|
||||
0, 56,112, 72,224,216,144,168,221,229,173,149, 61, 5, 77,117,
|
||||
0, 57,114, 75,228,221,150,175,213,236,167,158, 49, 8, 67,122,
|
||||
0, 58,116, 78,232,210,156,166,205,247,185,131, 37, 31, 81,107,
|
||||
0, 59,118, 77,236,215,154,161,197,254,179,136, 41, 18, 95,100,
|
||||
0, 60,120, 68,240,204,136,180,253,193,133,185, 13, 49,117, 73,
|
||||
0, 61,122, 71,244,201,142,179,245,200,143,178, 1, 60,123, 70,
|
||||
0, 62,124, 66,248,198,132,186,237,211,145,175, 21, 43,105, 87,
|
||||
0, 63,126, 65,252,195,130,189,229,218,155,164, 25, 38,103, 88,
|
||||
0, 64,128,192, 29, 93,157,221, 58,122,186,250, 39,103,167,231,
|
||||
0, 65,130,195, 25, 88,155,218, 50,115,176,241, 43,106,169,232,
|
||||
0, 66,132,198, 21, 87,145,211, 42,104,174,236, 63,125,187,249,
|
||||
0, 67,134,197, 17, 82,151,212, 34, 97,164,231, 51,112,181,246,
|
||||
0, 68,136,204, 13, 73,133,193, 26, 94,146,214, 23, 83,159,219,
|
||||
0, 69,138,207, 9, 76,131,198, 18, 87,152,221, 27, 94,145,212,
|
||||
0, 70,140,202, 5, 67,137,207, 10, 76,134,192, 15, 73,131,197,
|
||||
0, 71,142,201, 1, 70,143,200, 2, 69,140,203, 3, 68,141,202,
|
||||
0, 72,144,216, 61,117,173,229,122, 50,234,162, 71, 15,215,159,
|
||||
0, 73,146,219, 57,112,171,226,114, 59,224,169, 75, 2,217,144,
|
||||
0, 74,148,222, 53,127,161,235,106, 32,254,180, 95, 21,203,129,
|
||||
0, 75,150,221, 49,122,167,236, 98, 41,244,191, 83, 24,197,142,
|
||||
0, 76,152,212, 45, 97,181,249, 90, 22,194,142,119, 59,239,163,
|
||||
0, 77,154,215, 41,100,179,254, 82, 31,200,133,123, 54,225,172,
|
||||
0, 78,156,210, 37,107,185,247, 74, 4,214,152,111, 33,243,189,
|
||||
0, 79,158,209, 33,110,191,240, 66, 13,220,147, 99, 44,253,178,
|
||||
0, 80,160,240, 93, 13,253,173,186,234, 26, 74,231,183, 71, 23,
|
||||
0, 81,162,243, 89, 8,251,170,178,227, 16, 65,235,186, 73, 24,
|
||||
0, 82,164,246, 85, 7,241,163,170,248, 14, 92,255,173, 91, 9,
|
||||
0, 83,166,245, 81, 2,247,164,162,241, 4, 87,243,160, 85, 6,
|
||||
0, 84,168,252, 77, 25,229,177,154,206, 50,102,215,131,127, 43,
|
||||
0, 85,170,255, 73, 28,227,182,146,199, 56,109,219,142,113, 36,
|
||||
0, 86,172,250, 69, 19,233,191,138,220, 38,112,207,153, 99, 53,
|
||||
0, 87,174,249, 65, 22,239,184,130,213, 44,123,195,148,109, 58,
|
||||
0, 88,176,232,125, 37,205,149,250,162, 74, 18,135,223, 55,111,
|
||||
0, 89,178,235,121, 32,203,146,242,171, 64, 25,139,210, 57, 96,
|
||||
0, 90,180,238,117, 47,193,155,234,176, 94, 4,159,197, 43,113,
|
||||
0, 91,182,237,113, 42,199,156,226,185, 84, 15,147,200, 37,126,
|
||||
0, 92,184,228,109, 49,213,137,218,134, 98, 62,183,235, 15, 83,
|
||||
0, 93,186,231,105, 52,211,142,210,143,104, 53,187,230, 1, 92,
|
||||
0, 94,188,226,101, 59,217,135,202,148,118, 40,175,241, 19, 77,
|
||||
0, 95,190,225, 97, 62,223,128,194,157,124, 35,163,252, 29, 66,
|
||||
0, 96,192,160,157,253, 93, 61, 39, 71,231,135,186,218,122, 26,
|
||||
0, 97,194,163,153,248, 91, 58, 47, 78,237,140,182,215,116, 21,
|
||||
0, 98,196,166,149,247, 81, 51, 55, 85,243,145,162,192,102, 4,
|
||||
0, 99,198,165,145,242, 87, 52, 63, 92,249,154,174,205,104, 11,
|
||||
0,100,200,172,141,233, 69, 33, 7, 99,207,171,138,238, 66, 38,
|
||||
0,101,202,175,137,236, 67, 38, 15,106,197,160,134,227, 76, 41,
|
||||
0,102,204,170,133,227, 73, 47, 23,113,219,189,146,244, 94, 56,
|
||||
0,103,206,169,129,230, 79, 40, 31,120,209,182,158,249, 80, 55,
|
||||
0,104,208,184,189,213,109, 5,103, 15,183,223,218,178, 10, 98,
|
||||
0,105,210,187,185,208,107, 2,111, 6,189,212,214,191, 4,109,
|
||||
0,106,212,190,181,223, 97, 11,119, 29,163,201,194,168, 22,124,
|
||||
0,107,214,189,177,218,103, 12,127, 20,169,194,206,165, 24,115,
|
||||
0,108,216,180,173,193,117, 25, 71, 43,159,243,234,134, 50, 94,
|
||||
0,109,218,183,169,196,115, 30, 79, 34,149,248,230,139, 60, 81,
|
||||
0,110,220,178,165,203,121, 23, 87, 57,139,229,242,156, 46, 64,
|
||||
0,111,222,177,161,206,127, 16, 95, 48,129,238,254,145, 32, 79,
|
||||
0,112,224,144,221,173, 61, 77,167,215, 71, 55,122, 10,154,234,
|
||||
0,113,226,147,217,168, 59, 74,175,222, 77, 60,118, 7,148,229,
|
||||
0,114,228,150,213,167, 49, 67,183,197, 83, 33, 98, 16,134,244,
|
||||
0,115,230,149,209,162, 55, 68,191,204, 89, 42,110, 29,136,251,
|
||||
0,116,232,156,205,185, 37, 81,135,243,111, 27, 74, 62,162,214,
|
||||
0,117,234,159,201,188, 35, 86,143,250,101, 16, 70, 51,172,217,
|
||||
0,118,236,154,197,179, 41, 95,151,225,123, 13, 82, 36,190,200,
|
||||
0,119,238,153,193,182, 47, 88,159,232,113, 6, 94, 41,176,199,
|
||||
0,120,240,136,253,133, 13,117,231,159, 23,111, 26, 98,234,146,
|
||||
0,121,242,139,249,128, 11,114,239,150, 29,100, 22,111,228,157,
|
||||
0,122,244,142,245,143, 1,123,247,141, 3,121, 2,120,246,140,
|
||||
0,123,246,141,241,138, 7,124,255,132, 9,114, 14,117,248,131,
|
||||
0,124,248,132,237,145, 21,105,199,187, 63, 67, 42, 86,210,174,
|
||||
0,125,250,135,233,148, 19,110,207,178, 53, 72, 38, 91,220,161,
|
||||
0,126,252,130,229,155, 25,103,215,169, 43, 85, 50, 76,206,176,
|
||||
0,127,254,129,225,158, 31, 96,223,160, 33, 94, 62, 65,192,191,
|
||||
0,128, 29,157, 58,186, 39,167,116,244,105,233, 78,206, 83,211,
|
||||
0,129, 31,158, 62,191, 33,160,124,253, 99,226, 66,195, 93,220,
|
||||
0,130, 25,155, 50,176, 43,169,100,230,125,255, 86,212, 79,205,
|
||||
0,131, 27,152, 54,181, 45,174,108,239,119,244, 90,217, 65,194,
|
||||
0,132, 21,145, 42,174, 63,187, 84,208, 65,197,126,250,107,239,
|
||||
0,133, 23,146, 46,171, 57,188, 92,217, 75,206,114,247,101,224,
|
||||
0,134, 17,151, 34,164, 51,181, 68,194, 85,211,102,224,119,241,
|
||||
0,135, 19,148, 38,161, 53,178, 76,203, 95,216,106,237,121,254,
|
||||
0,136, 13,133, 26,146, 23,159, 52,188, 57,177, 46,166, 35,171,
|
||||
0,137, 15,134, 30,151, 17,152, 60,181, 51,186, 34,171, 45,164,
|
||||
0,138, 9,131, 18,152, 27,145, 36,174, 45,167, 54,188, 63,181,
|
||||
0,139, 11,128, 22,157, 29,150, 44,167, 39,172, 58,177, 49,186,
|
||||
0,140, 5,137, 10,134, 15,131, 20,152, 17,157, 30,146, 27,151,
|
||||
0,141, 7,138, 14,131, 9,132, 28,145, 27,150, 18,159, 21,152,
|
||||
0,142, 1,143, 2,140, 3,141, 4,138, 5,139, 6,136, 7,137,
|
||||
0,143, 3,140, 6,137, 5,138, 12,131, 15,128, 10,133, 9,134,
|
||||
0,144, 61,173,122,234, 71,215,244,100,201, 89,142, 30,179, 35,
|
||||
0,145, 63,174,126,239, 65,208,252,109,195, 82,130, 19,189, 44,
|
||||
0,146, 57,171,114,224, 75,217,228,118,221, 79,150, 4,175, 61,
|
||||
0,147, 59,168,118,229, 77,222,236,127,215, 68,154, 9,161, 50,
|
||||
0,148, 53,161,106,254, 95,203,212, 64,225,117,190, 42,139, 31,
|
||||
0,149, 55,162,110,251, 89,204,220, 73,235,126,178, 39,133, 16,
|
||||
0,150, 49,167, 98,244, 83,197,196, 82,245, 99,166, 48,151, 1,
|
||||
0,151, 51,164,102,241, 85,194,204, 91,255,104,170, 61,153, 14,
|
||||
0,152, 45,181, 90,194,119,239,180, 44,153, 1,238,118,195, 91,
|
||||
0,153, 47,182, 94,199,113,232,188, 37,147, 10,226,123,205, 84,
|
||||
0,154, 41,179, 82,200,123,225,164, 62,141, 23,246,108,223, 69,
|
||||
0,155, 43,176, 86,205,125,230,172, 55,135, 28,250, 97,209, 74,
|
||||
0,156, 37,185, 74,214,111,243,148, 8,177, 45,222, 66,251,103,
|
||||
0,157, 39,186, 78,211,105,244,156, 1,187, 38,210, 79,245,104,
|
||||
0,158, 33,191, 66,220, 99,253,132, 26,165, 59,198, 88,231,121,
|
||||
0,159, 35,188, 70,217,101,250,140, 19,175, 48,202, 85,233,118,
|
||||
0,160, 93,253,186, 26,231, 71,105,201, 52,148,211,115,142, 46,
|
||||
0,161, 95,254,190, 31,225, 64, 97,192, 62,159,223,126,128, 33,
|
||||
0,162, 89,251,178, 16,235, 73,121,219, 32,130,203,105,146, 48,
|
||||
0,163, 91,248,182, 21,237, 78,113,210, 42,137,199,100,156, 63,
|
||||
0,164, 85,241,170, 14,255, 91, 73,237, 28,184,227, 71,182, 18,
|
||||
0,165, 87,242,174, 11,249, 92, 65,228, 22,179,239, 74,184, 29,
|
||||
0,166, 81,247,162, 4,243, 85, 89,255, 8,174,251, 93,170, 12,
|
||||
0,167, 83,244,166, 1,245, 82, 81,246, 2,165,247, 80,164, 3,
|
||||
0,168, 77,229,154, 50,215,127, 41,129,100,204,179, 27,254, 86,
|
||||
0,169, 79,230,158, 55,209,120, 33,136,110,199,191, 22,240, 89,
|
||||
0,170, 73,227,146, 56,219,113, 57,147,112,218,171, 1,226, 72,
|
||||
0,171, 75,224,150, 61,221,118, 49,154,122,209,167, 12,236, 71,
|
||||
0,172, 69,233,138, 38,207, 99, 9,165, 76,224,131, 47,198,106,
|
||||
0,173, 71,234,142, 35,201,100, 1,172, 70,235,143, 34,200,101,
|
||||
0,174, 65,239,130, 44,195,109, 25,183, 88,246,155, 53,218,116,
|
||||
0,175, 67,236,134, 41,197,106, 17,190, 82,253,151, 56,212,123,
|
||||
0,176,125,205,250, 74,135, 55,233, 89,148, 36, 19,163,110,222,
|
||||
0,177,127,206,254, 79,129, 48,225, 80,158, 47, 31,174, 96,209,
|
||||
0,178,121,203,242, 64,139, 57,249, 75,128, 50, 11,185,114,192,
|
||||
0,179,123,200,246, 69,141, 62,241, 66,138, 57, 7,180,124,207,
|
||||
0,180,117,193,234, 94,159, 43,201,125,188, 8, 35,151, 86,226,
|
||||
0,181,119,194,238, 91,153, 44,193,116,182, 3, 47,154, 88,237,
|
||||
0,182,113,199,226, 84,147, 37,217,111,168, 30, 59,141, 74,252,
|
||||
0,183,115,196,230, 81,149, 34,209,102,162, 21, 55,128, 68,243,
|
||||
0,184,109,213,218, 98,183, 15,169, 17,196,124,115,203, 30,166,
|
||||
0,185,111,214,222,103,177, 8,161, 24,206,119,127,198, 16,169,
|
||||
0,186,105,211,210,104,187, 1,185, 3,208,106,107,209, 2,184,
|
||||
0,187,107,208,214,109,189, 6,177, 10,218, 97,103,220, 12,183,
|
||||
0,188,101,217,202,118,175, 19,137, 53,236, 80, 67,255, 38,154,
|
||||
0,189,103,218,206,115,169, 20,129, 60,230, 91, 79,242, 40,149,
|
||||
0,190, 97,223,194,124,163, 29,153, 39,248, 70, 91,229, 58,132,
|
||||
0,191, 99,220,198,121,165, 26,145, 46,242, 77, 87,232, 52,139,
|
||||
0,192,157, 93, 39,231,186,122, 78,142,211, 19,105,169,244, 52,
|
||||
0,193,159, 94, 35,226,188,125, 70,135,217, 24,101,164,250, 59,
|
||||
0,194,153, 91, 47,237,182,116, 94,156,199, 5,113,179,232, 42,
|
||||
0,195,155, 88, 43,232,176,115, 86,149,205, 14,125,190,230, 37,
|
||||
0,196,149, 81, 55,243,162,102,110,170,251, 63, 89,157,204, 8,
|
||||
0,197,151, 82, 51,246,164, 97,102,163,241, 52, 85,144,194, 7,
|
||||
0,198,145, 87, 63,249,174,104,126,184,239, 41, 65,135,208, 22,
|
||||
0,199,147, 84, 59,252,168,111,118,177,229, 34, 77,138,222, 25,
|
||||
0,200,141, 69, 7,207,138, 66, 14,198,131, 75, 9,193,132, 76,
|
||||
0,201,143, 70, 3,202,140, 69, 6,207,137, 64, 5,204,138, 67,
|
||||
0,202,137, 67, 15,197,134, 76, 30,212,151, 93, 17,219,152, 82,
|
||||
0,203,139, 64, 11,192,128, 75, 22,221,157, 86, 29,214,150, 93,
|
||||
0,204,133, 73, 23,219,146, 94, 46,226,171,103, 57,245,188,112,
|
||||
0,205,135, 74, 19,222,148, 89, 38,235,161,108, 53,248,178,127,
|
||||
0,206,129, 79, 31,209,158, 80, 62,240,191,113, 33,239,160,110,
|
||||
0,207,131, 76, 27,212,152, 87, 54,249,181,122, 45,226,174, 97,
|
||||
0,208,189,109,103,183,218, 10,206, 30,115,163,169,121, 20,196,
|
||||
0,209,191,110, 99,178,220, 13,198, 23,121,168,165,116, 26,203,
|
||||
0,210,185,107,111,189,214, 4,222, 12,103,181,177, 99, 8,218,
|
||||
0,211,187,104,107,184,208, 3,214, 5,109,190,189,110, 6,213,
|
||||
0,212,181, 97,119,163,194, 22,238, 58, 91,143,153, 77, 44,248,
|
||||
0,213,183, 98,115,166,196, 17,230, 51, 81,132,149, 64, 34,247,
|
||||
0,214,177,103,127,169,206, 24,254, 40, 79,153,129, 87, 48,230,
|
||||
0,215,179,100,123,172,200, 31,246, 33, 69,146,141, 90, 62,233,
|
||||
0,216,173,117, 71,159,234, 50,142, 86, 35,251,201, 17,100,188,
|
||||
0,217,175,118, 67,154,236, 53,134, 95, 41,240,197, 28,106,179,
|
||||
0,218,169,115, 79,149,230, 60,158, 68, 55,237,209, 11,120,162,
|
||||
0,219,171,112, 75,144,224, 59,150, 77, 61,230,221, 6,118,173,
|
||||
0,220,165,121, 87,139,242, 46,174,114, 11,215,249, 37, 92,128,
|
||||
0,221,167,122, 83,142,244, 41,166,123, 1,220,245, 40, 82,143,
|
||||
0,222,161,127, 95,129,254, 32,190, 96, 31,193,225, 63, 64,158,
|
||||
0,223,163,124, 91,132,248, 39,182,105, 21,202,237, 50, 78,145,
|
||||
0,224,221, 61,167, 71,122,154, 83,179,142,110,244, 20, 41,201,
|
||||
0,225,223, 62,163, 66,124,157, 91,186,132,101,248, 25, 39,198,
|
||||
0,226,217, 59,175, 77,118,148, 67,161,154,120,236, 14, 53,215,
|
||||
0,227,219, 56,171, 72,112,147, 75,168,144,115,224, 3, 59,216,
|
||||
0,228,213, 49,183, 83, 98,134,115,151,166, 66,196, 32, 17,245,
|
||||
0,229,215, 50,179, 86,100,129,123,158,172, 73,200, 45, 31,250,
|
||||
0,230,209, 55,191, 89,110,136, 99,133,178, 84,220, 58, 13,235,
|
||||
0,231,211, 52,187, 92,104,143,107,140,184, 95,208, 55, 3,228,
|
||||
0,232,205, 37,135,111, 74,162, 19,251,222, 54,148,124, 89,177,
|
||||
0,233,207, 38,131,106, 76,165, 27,242,212, 61,152,113, 87,190,
|
||||
0,234,201, 35,143,101, 70,172, 3,233,202, 32,140,102, 69,175,
|
||||
0,235,203, 32,139, 96, 64,171, 11,224,192, 43,128,107, 75,160,
|
||||
0,236,197, 41,151,123, 82,190, 51,223,246, 26,164, 72, 97,141,
|
||||
0,237,199, 42,147,126, 84,185, 59,214,252, 17,168, 69,111,130,
|
||||
0,238,193, 47,159,113, 94,176, 35,205,226, 12,188, 82,125,147,
|
||||
0,239,195, 44,155,116, 88,183, 43,196,232, 7,176, 95,115,156,
|
||||
0,240,253, 13,231, 23, 26,234,211, 35, 46,222, 52,196,201, 57,
|
||||
0,241,255, 14,227, 18, 28,237,219, 42, 36,213, 56,201,199, 54,
|
||||
0,242,249, 11,239, 29, 22,228,195, 49, 58,200, 44,222,213, 39,
|
||||
0,243,251, 8,235, 24, 16,227,203, 56, 48,195, 32,211,219, 40,
|
||||
0,244,245, 1,247, 3, 2,246,243, 7, 6,242, 4,240,241, 5,
|
||||
0,245,247, 2,243, 6, 4,241,251, 14, 12,249, 8,253,255, 10,
|
||||
0,246,241, 7,255, 9, 14,248,227, 21, 18,228, 28,234,237, 27,
|
||||
0,247,243, 4,251, 12, 8,255,235, 28, 24,239, 16,231,227, 20,
|
||||
0,248,237, 21,199, 63, 42,210,147,107,126,134, 84,172,185, 65,
|
||||
0,249,239, 22,195, 58, 44,213,155, 98,116,141, 88,161,183, 78,
|
||||
0,250,233, 19,207, 53, 38,220,131,121,106,144, 76,182,165, 95,
|
||||
0,251,235, 16,203, 48, 32,219,139,112, 96,155, 64,187,171, 80,
|
||||
0,252,229, 25,215, 43, 50,206,179, 79, 86,170,100,152,129,125,
|
||||
0,253,231, 26,211, 46, 52,201,187, 70, 92,161,104,149,143,114,
|
||||
0,254,225, 31,223, 33, 62,192,163, 93, 66,188,124,130,157, 99,
|
||||
0,255,227, 28,219, 36, 56,199,171, 84, 72,183,112,143,147,108,
|
||||
};
|
||||
|
||||
static const uint8_t GF2_8_SHUF_HI[] =
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 16, 32, 48, 64, 80, 96,112,128,144,160,176,192,208,224,240,
|
||||
0, 32, 64, 96,128,160,192,224, 29, 61, 93,125,157,189,221,253,
|
||||
0, 48, 96, 80,192,240,160,144,157,173,253,205, 93,109, 61, 13,
|
||||
0, 64,128,192, 29, 93,157,221, 58,122,186,250, 39,103,167,231,
|
||||
0, 80,160,240, 93, 13,253,173,186,234, 26, 74,231,183, 71, 23,
|
||||
0, 96,192,160,157,253, 93, 61, 39, 71,231,135,186,218,122, 26,
|
||||
0,112,224,144,221,173, 61, 77,167,215, 71, 55,122, 10,154,234,
|
||||
0,128, 29,157, 58,186, 39,167,116,244,105,233, 78,206, 83,211,
|
||||
0,144, 61,173,122,234, 71,215,244,100,201, 89,142, 30,179, 35,
|
||||
0,160, 93,253,186, 26,231, 71,105,201, 52,148,211,115,142, 46,
|
||||
0,176,125,205,250, 74,135, 55,233, 89,148, 36, 19,163,110,222,
|
||||
0,192,157, 93, 39,231,186,122, 78,142,211, 19,105,169,244, 52,
|
||||
0,208,189,109,103,183,218, 10,206, 30,115,163,169,121, 20,196,
|
||||
0,224,221, 61,167, 71,122,154, 83,179,142,110,244, 20, 41,201,
|
||||
0,240,253, 13,231, 23, 26,234,211, 35, 46,222, 52,196,201, 57,
|
||||
0, 29, 58, 39,116,105, 78, 83,232,245,210,207,156,129,166,187,
|
||||
0, 13, 26, 23, 52, 57, 46, 35,104,101,114,127, 92, 81, 70, 75,
|
||||
0, 61,122, 71,244,201,142,179,245,200,143,178, 1, 60,123, 70,
|
||||
0, 45, 90,119,180,153,238,195,117, 88, 47, 2,193,236,155,182,
|
||||
0, 93,186,231,105, 52,211,142,210,143,104, 53,187,230, 1, 92,
|
||||
0, 77,154,215, 41,100,179,254, 82, 31,200,133,123, 54,225,172,
|
||||
0,125,250,135,233,148, 19,110,207,178, 53, 72, 38, 91,220,161,
|
||||
0,109,218,183,169,196,115, 30, 79, 34,149,248,230,139, 60, 81,
|
||||
0,157, 39,186, 78,211,105,244,156, 1,187, 38,210, 79,245,104,
|
||||
0,141, 7,138, 14,131, 9,132, 28,145, 27,150, 18,159, 21,152,
|
||||
0,189,103,218,206,115,169, 20,129, 60,230, 91, 79,242, 40,149,
|
||||
0,173, 71,234,142, 35,201,100, 1,172, 70,235,143, 34,200,101,
|
||||
0,221,167,122, 83,142,244, 41,166,123, 1,220,245, 40, 82,143,
|
||||
0,205,135, 74, 19,222,148, 89, 38,235,161,108, 53,248,178,127,
|
||||
0,253,231, 26,211, 46, 52,201,187, 70, 92,161,104,149,143,114,
|
||||
0,237,199, 42,147,126, 84,185, 59,214,252, 17,168, 69,111,130,
|
||||
0, 58,116, 78,232,210,156,166,205,247,185,131, 37, 31, 81,107,
|
||||
0, 42, 84,126,168,130,252,214, 77,103, 25, 51,229,207,177,155,
|
||||
0, 26, 52, 46,104,114, 92, 70,208,202,228,254,184,162,140,150,
|
||||
0, 10, 20, 30, 40, 34, 60, 54, 80, 90, 68, 78,120,114,108,102,
|
||||
0,122,244,142,245,143, 1,123,247,141, 3,121, 2,120,246,140,
|
||||
0,106,212,190,181,223, 97, 11,119, 29,163,201,194,168, 22,124,
|
||||
0, 90,180,238,117, 47,193,155,234,176, 94, 4,159,197, 43,113,
|
||||
0, 74,148,222, 53,127,161,235,106, 32,254,180, 95, 21,203,129,
|
||||
0,186,105,211,210,104,187, 1,185, 3,208,106,107,209, 2,184,
|
||||
0,170, 73,227,146, 56,219,113, 57,147,112,218,171, 1,226, 72,
|
||||
0,154, 41,179, 82,200,123,225,164, 62,141, 23,246,108,223, 69,
|
||||
0,138, 9,131, 18,152, 27,145, 36,174, 45,167, 54,188, 63,181,
|
||||
0,250,233, 19,207, 53, 38,220,131,121,106,144, 76,182,165, 95,
|
||||
0,234,201, 35,143,101, 70,172, 3,233,202, 32,140,102, 69,175,
|
||||
0,218,169,115, 79,149,230, 60,158, 68, 55,237,209, 11,120,162,
|
||||
0,202,137, 67, 15,197,134, 76, 30,212,151, 93, 17,219,152, 82,
|
||||
0, 39, 78,105,156,187,210,245, 37, 2,107, 76,185,158,247,208,
|
||||
0, 55,110, 89,220,235,178,133,165,146,203,252,121, 78, 23, 32,
|
||||
0, 7, 14, 9, 28, 27, 18, 21, 56, 63, 54, 49, 36, 35, 42, 45,
|
||||
0, 23, 46, 57, 92, 75,114,101,184,175,150,129,228,243,202,221,
|
||||
0,103,206,169,129,230, 79, 40, 31,120,209,182,158,249, 80, 55,
|
||||
0,119,238,153,193,182, 47, 88,159,232,113, 6, 94, 41,176,199,
|
||||
0, 71,142,201, 1, 70,143,200, 2, 69,140,203, 3, 68,141,202,
|
||||
0, 87,174,249, 65, 22,239,184,130,213, 44,123,195,148,109, 58,
|
||||
0,167, 83,244,166, 1,245, 82, 81,246, 2,165,247, 80,164, 3,
|
||||
0,183,115,196,230, 81,149, 34,209,102,162, 21, 55,128, 68,243,
|
||||
0,135, 19,148, 38,161, 53,178, 76,203, 95,216,106,237,121,254,
|
||||
0,151, 51,164,102,241, 85,194,204, 91,255,104,170, 61,153, 14,
|
||||
0,231,211, 52,187, 92,104,143,107,140,184, 95,208, 55, 3,228,
|
||||
0,247,243, 4,251, 12, 8,255,235, 28, 24,239, 16,231,227, 20,
|
||||
0,199,147, 84, 59,252,168,111,118,177,229, 34, 77,138,222, 25,
|
||||
0,215,179,100,123,172,200, 31,246, 33, 69,146,141, 90, 62,233,
|
||||
0,116,232,156,205,185, 37, 81,135,243,111, 27, 74, 62,162,214,
|
||||
0,100,200,172,141,233, 69, 33, 7, 99,207,171,138,238, 66, 38,
|
||||
0, 84,168,252, 77, 25,229,177,154,206, 50,102,215,131,127, 43,
|
||||
0, 68,136,204, 13, 73,133,193, 26, 94,146,214, 23, 83,159,219,
|
||||
0, 52,104, 92,208,228,184,140,189,137,213,225,109, 89, 5, 49,
|
||||
0, 36, 72,108,144,180,216,252, 61, 25,117, 81,173,137,229,193,
|
||||
0, 20, 40, 60, 80, 68,120,108,160,180,136,156,240,228,216,204,
|
||||
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
|
||||
0,244,245, 1,247, 3, 2,246,243, 7, 6,242, 4,240,241, 5,
|
||||
0,228,213, 49,183, 83, 98,134,115,151,166, 66,196, 32, 17,245,
|
||||
0,212,181, 97,119,163,194, 22,238, 58, 91,143,153, 77, 44,248,
|
||||
0,196,149, 81, 55,243,162,102,110,170,251, 63, 89,157,204, 8,
|
||||
0,180,117,193,234, 94,159, 43,201,125,188, 8, 35,151, 86,226,
|
||||
0,164, 85,241,170, 14,255, 91, 73,237, 28,184,227, 71,182, 18,
|
||||
0,148, 53,161,106,254, 95,203,212, 64,225,117,190, 42,139, 31,
|
||||
0,132, 21,145, 42,174, 63,187, 84,208, 65,197,126,250,107,239,
|
||||
0,105,210,187,185,208,107, 2,111, 6,189,212,214,191, 4,109,
|
||||
0,121,242,139,249,128, 11,114,239,150, 29,100, 22,111,228,157,
|
||||
0, 73,146,219, 57,112,171,226,114, 59,224,169, 75, 2,217,144,
|
||||
0, 89,178,235,121, 32,203,146,242,171, 64, 25,139,210, 57, 96,
|
||||
0, 41, 82,123,164,141,246,223, 85,124, 7, 46,241,216,163,138,
|
||||
0, 57,114, 75,228,221,150,175,213,236,167,158, 49, 8, 67,122,
|
||||
0, 9, 18, 27, 36, 45, 54, 63, 72, 65, 90, 83,108,101,126,119,
|
||||
0, 25, 50, 43,100,125, 86, 79,200,209,250,227,172,181,158,135,
|
||||
0,233,207, 38,131,106, 76,165, 27,242,212, 61,152,113, 87,190,
|
||||
0,249,239, 22,195, 58, 44,213,155, 98,116,141, 88,161,183, 78,
|
||||
0,201,143, 70, 3,202,140, 69, 6,207,137, 64, 5,204,138, 67,
|
||||
0,217,175,118, 67,154,236, 53,134, 95, 41,240,197, 28,106,179,
|
||||
0,169, 79,230,158, 55,209,120, 33,136,110,199,191, 22,240, 89,
|
||||
0,185,111,214,222,103,177, 8,161, 24,206,119,127,198, 16,169,
|
||||
0,137, 15,134, 30,151, 17,152, 60,181, 51,186, 34,171, 45,164,
|
||||
0,153, 47,182, 94,199,113,232,188, 37,147, 10,226,123,205, 84,
|
||||
0, 78,156,210, 37,107,185,247, 74, 4,214,152,111, 33,243,189,
|
||||
0, 94,188,226,101, 59,217,135,202,148,118, 40,175,241, 19, 77,
|
||||
0,110,220,178,165,203,121, 23, 87, 57,139,229,242,156, 46, 64,
|
||||
0,126,252,130,229,155, 25,103,215,169, 43, 85, 50, 76,206,176,
|
||||
0, 14, 28, 18, 56, 54, 36, 42,112,126,108, 98, 72, 70, 84, 90,
|
||||
0, 30, 60, 34,120,102, 68, 90,240,238,204,210,136,150,180,170,
|
||||
0, 46, 92,114,184,150,228,202,109, 67, 49, 31,213,251,137,167,
|
||||
0, 62,124, 66,248,198,132,186,237,211,145,175, 21, 43,105, 87,
|
||||
0,206,129, 79, 31,209,158, 80, 62,240,191,113, 33,239,160,110,
|
||||
0,222,161,127, 95,129,254, 32,190, 96, 31,193,225, 63, 64,158,
|
||||
0,238,193, 47,159,113, 94,176, 35,205,226, 12,188, 82,125,147,
|
||||
0,254,225, 31,223, 33, 62,192,163, 93, 66,188,124,130,157, 99,
|
||||
0,142, 1,143, 2,140, 3,141, 4,138, 5,139, 6,136, 7,137,
|
||||
0,158, 33,191, 66,220, 99,253,132, 26,165, 59,198, 88,231,121,
|
||||
0,174, 65,239,130, 44,195,109, 25,183, 88,246,155, 53,218,116,
|
||||
0,190, 97,223,194,124,163, 29,153, 39,248, 70, 91,229, 58,132,
|
||||
0, 83,166,245, 81, 2,247,164,162,241, 4, 87,243,160, 85, 6,
|
||||
0, 67,134,197, 17, 82,151,212, 34, 97,164,231, 51,112,181,246,
|
||||
0,115,230,149,209,162, 55, 68,191,204, 89, 42,110, 29,136,251,
|
||||
0, 99,198,165,145,242, 87, 52, 63, 92,249,154,174,205,104, 11,
|
||||
0, 19, 38, 53, 76, 95,106,121,152,139,190,173,212,199,242,225,
|
||||
0, 3, 6, 5, 12, 15, 10, 9, 24, 27, 30, 29, 20, 23, 18, 17,
|
||||
0, 51,102, 85,204,255,170,153,133,182,227,208, 73,122, 47, 28,
|
||||
0, 35, 70,101,140,175,202,233, 5, 38, 67, 96,137,170,207,236,
|
||||
0,211,187,104,107,184,208, 3,214, 5,109,190,189,110, 6,213,
|
||||
0,195,155, 88, 43,232,176,115, 86,149,205, 14,125,190,230, 37,
|
||||
0,243,251, 8,235, 24, 16,227,203, 56, 48,195, 32,211,219, 40,
|
||||
0,227,219, 56,171, 72,112,147, 75,168,144,115,224, 3, 59,216,
|
||||
0,147, 59,168,118,229, 77,222,236,127,215, 68,154, 9,161, 50,
|
||||
0,131, 27,152, 54,181, 45,174,108,239,119,244, 90,217, 65,194,
|
||||
0,179,123,200,246, 69,141, 62,241, 66,138, 57, 7,180,124,207,
|
||||
0,163, 91,248,182, 21,237, 78,113,210, 42,137,199,100,156, 63,
|
||||
0,232,205, 37,135,111, 74,162, 19,251,222, 54,148,124, 89,177,
|
||||
0,248,237, 21,199, 63, 42,210,147,107,126,134, 84,172,185, 65,
|
||||
0,200,141, 69, 7,207,138, 66, 14,198,131, 75, 9,193,132, 76,
|
||||
0,216,173,117, 71,159,234, 50,142, 86, 35,251,201, 17,100,188,
|
||||
0,168, 77,229,154, 50,215,127, 41,129,100,204,179, 27,254, 86,
|
||||
0,184,109,213,218, 98,183, 15,169, 17,196,124,115,203, 30,166,
|
||||
0,136, 13,133, 26,146, 23,159, 52,188, 57,177, 46,166, 35,171,
|
||||
0,152, 45,181, 90,194,119,239,180, 44,153, 1,238,118,195, 91,
|
||||
0,104,208,184,189,213,109, 5,103, 15,183,223,218,178, 10, 98,
|
||||
0,120,240,136,253,133, 13,117,231,159, 23,111, 26, 98,234,146,
|
||||
0, 72,144,216, 61,117,173,229,122, 50,234,162, 71, 15,215,159,
|
||||
0, 88,176,232,125, 37,205,149,250,162, 74, 18,135,223, 55,111,
|
||||
0, 40, 80,120,160,136,240,216, 93,117, 13, 37,253,213,173,133,
|
||||
0, 56,112, 72,224,216,144,168,221,229,173,149, 61, 5, 77,117,
|
||||
0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96,104,112,120,
|
||||
0, 24, 48, 40, 96,120, 80, 72,192,216,240,232,160,184,144,136,
|
||||
0,245,247, 2,243, 6, 4,241,251, 14, 12,249, 8,253,255, 10,
|
||||
0,229,215, 50,179, 86,100,129,123,158,172, 73,200, 45, 31,250,
|
||||
0,213,183, 98,115,166,196, 17,230, 51, 81,132,149, 64, 34,247,
|
||||
0,197,151, 82, 51,246,164, 97,102,163,241, 52, 85,144,194, 7,
|
||||
0,181,119,194,238, 91,153, 44,193,116,182, 3, 47,154, 88,237,
|
||||
0,165, 87,242,174, 11,249, 92, 65,228, 22,179,239, 74,184, 29,
|
||||
0,149, 55,162,110,251, 89,204,220, 73,235,126,178, 39,133, 16,
|
||||
0,133, 23,146, 46,171, 57,188, 92,217, 75,206,114,247,101,224,
|
||||
0,117,234,159,201,188, 35, 86,143,250,101, 16, 70, 51,172,217,
|
||||
0,101,202,175,137,236, 67, 38, 15,106,197,160,134,227, 76, 41,
|
||||
0, 85,170,255, 73, 28,227,182,146,199, 56,109,219,142,113, 36,
|
||||
0, 69,138,207, 9, 76,131,198, 18, 87,152,221, 27, 94,145,212,
|
||||
0, 53,106, 95,212,225,190,139,181,128,223,234, 97, 84, 11, 62,
|
||||
0, 37, 74,111,148,177,222,251, 53, 16,127, 90,161,132,235,206,
|
||||
0, 21, 42, 63, 84, 65,126,107,168,189,130,151,252,233,214,195,
|
||||
0, 5, 10, 15, 20, 17, 30, 27, 40, 45, 34, 39, 60, 57, 54, 51,
|
||||
0,210,185,107,111,189,214, 4,222, 12,103,181,177, 99, 8,218,
|
||||
0,194,153, 91, 47,237,182,116, 94,156,199, 5,113,179,232, 42,
|
||||
0,242,249, 11,239, 29, 22,228,195, 49, 58,200, 44,222,213, 39,
|
||||
0,226,217, 59,175, 77,118,148, 67,161,154,120,236, 14, 53,215,
|
||||
0,146, 57,171,114,224, 75,217,228,118,221, 79,150, 4,175, 61,
|
||||
0,130, 25,155, 50,176, 43,169,100,230,125,255, 86,212, 79,205,
|
||||
0,178,121,203,242, 64,139, 57,249, 75,128, 50, 11,185,114,192,
|
||||
0,162, 89,251,178, 16,235, 73,121,219, 32,130,203,105,146, 48,
|
||||
0, 82,164,246, 85, 7,241,163,170,248, 14, 92,255,173, 91, 9,
|
||||
0, 66,132,198, 21, 87,145,211, 42,104,174,236, 63,125,187,249,
|
||||
0,114,228,150,213,167, 49, 67,183,197, 83, 33, 98, 16,134,244,
|
||||
0, 98,196,166,149,247, 81, 51, 55, 85,243,145,162,192,102, 4,
|
||||
0, 18, 36, 54, 72, 90,108,126,144,130,180,166,216,202,252,238,
|
||||
0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
|
||||
0, 50,100, 86,200,250,172,158,141,191,233,219, 69,119, 33, 19,
|
||||
0, 34, 68,102,136,170,204,238, 13, 47, 73,107,133,167,193,227,
|
||||
0,207,131, 76, 27,212,152, 87, 54,249,181,122, 45,226,174, 97,
|
||||
0,223,163,124, 91,132,248, 39,182,105, 21,202,237, 50, 78,145,
|
||||
0,239,195, 44,155,116, 88,183, 43,196,232, 7,176, 95,115,156,
|
||||
0,255,227, 28,219, 36, 56,199,171, 84, 72,183,112,143,147,108,
|
||||
0,143, 3,140, 6,137, 5,138, 12,131, 15,128, 10,133, 9,134,
|
||||
0,159, 35,188, 70,217,101,250,140, 19,175, 48,202, 85,233,118,
|
||||
0,175, 67,236,134, 41,197,106, 17,190, 82,253,151, 56,212,123,
|
||||
0,191, 99,220,198,121,165, 26,145, 46,242, 77, 87,232, 52,139,
|
||||
0, 79,158,209, 33,110,191,240, 66, 13,220,147, 99, 44,253,178,
|
||||
0, 95,190,225, 97, 62,223,128,194,157,124, 35,163,252, 29, 66,
|
||||
0,111,222,177,161,206,127, 16, 95, 48,129,238,254,145, 32, 79,
|
||||
0,127,254,129,225,158, 31, 96,223,160, 33, 94, 62, 65,192,191,
|
||||
0, 15, 30, 17, 60, 51, 34, 45,120,119,102,105, 68, 75, 90, 85,
|
||||
0, 31, 62, 33,124, 99, 66, 93,248,231,198,217,132,155,186,165,
|
||||
0, 47, 94,113,188,147,226,205,101, 74, 59, 20,217,246,135,168,
|
||||
0, 63,126, 65,252,195,130,189,229,218,155,164, 25, 38,103, 88,
|
||||
0,156, 37,185, 74,214,111,243,148, 8,177, 45,222, 66,251,103,
|
||||
0,140, 5,137, 10,134, 15,131, 20,152, 17,157, 30,146, 27,151,
|
||||
0,188,101,217,202,118,175, 19,137, 53,236, 80, 67,255, 38,154,
|
||||
0,172, 69,233,138, 38,207, 99, 9,165, 76,224,131, 47,198,106,
|
||||
0,220,165,121, 87,139,242, 46,174,114, 11,215,249, 37, 92,128,
|
||||
0,204,133, 73, 23,219,146, 94, 46,226,171,103, 57,245,188,112,
|
||||
0,252,229, 25,215, 43, 50,206,179, 79, 86,170,100,152,129,125,
|
||||
0,236,197, 41,151,123, 82,190, 51,223,246, 26,164, 72, 97,141,
|
||||
0, 28, 56, 36,112,108, 72, 84,224,252,216,196,144,140,168,180,
|
||||
0, 12, 24, 20, 48, 60, 40, 36, 96,108,120,116, 80, 92, 72, 68,
|
||||
0, 60,120, 68,240,204,136,180,253,193,133,185, 13, 49,117, 73,
|
||||
0, 44, 88,116,176,156,232,196,125, 81, 37, 9,205,225,149,185,
|
||||
0, 92,184,228,109, 49,213,137,218,134, 98, 62,183,235, 15, 83,
|
||||
0, 76,152,212, 45, 97,181,249, 90, 22,194,142,119, 59,239,163,
|
||||
0,124,248,132,237,145, 21,105,199,187, 63, 67, 42, 86,210,174,
|
||||
0,108,216,180,173,193,117, 25, 71, 43,159,243,234,134, 50, 94,
|
||||
0,129, 31,158, 62,191, 33,160,124,253, 99,226, 66,195, 93,220,
|
||||
0,145, 63,174,126,239, 65,208,252,109,195, 82,130, 19,189, 44,
|
||||
0,161, 95,254,190, 31,225, 64, 97,192, 62,159,223,126,128, 33,
|
||||
0,177,127,206,254, 79,129, 48,225, 80,158, 47, 31,174, 96,209,
|
||||
0,193,159, 94, 35,226,188,125, 70,135,217, 24,101,164,250, 59,
|
||||
0,209,191,110, 99,178,220, 13,198, 23,121,168,165,116, 26,203,
|
||||
0,225,223, 62,163, 66,124,157, 91,186,132,101,248, 25, 39,198,
|
||||
0,241,255, 14,227, 18, 28,237,219, 42, 36,213, 56,201,199, 54,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
0, 17, 34, 51, 68, 85,102,119,136,153,170,187,204,221,238,255,
|
||||
0, 33, 66, 99,132,165,198,231, 21, 52, 87,118,145,176,211,242,
|
||||
0, 49, 98, 83,196,245,166,151,149,164,247,198, 81, 96, 51, 2,
|
||||
0, 65,130,195, 25, 88,155,218, 50,115,176,241, 43,106,169,232,
|
||||
0, 81,162,243, 89, 8,251,170,178,227, 16, 65,235,186, 73, 24,
|
||||
0, 97,194,163,153,248, 91, 58, 47, 78,237,140,182,215,116, 21,
|
||||
0,113,226,147,217,168, 59, 74,175,222, 77, 60,118, 7,148,229,
|
||||
0,166, 81,247,162, 4,243, 85, 89,255, 8,174,251, 93,170, 12,
|
||||
0,182,113,199,226, 84,147, 37,217,111,168, 30, 59,141, 74,252,
|
||||
0,134, 17,151, 34,164, 51,181, 68,194, 85,211,102,224,119,241,
|
||||
0,150, 49,167, 98,244, 83,197,196, 82,245, 99,166, 48,151, 1,
|
||||
0,230,209, 55,191, 89,110,136, 99,133,178, 84,220, 58, 13,235,
|
||||
0,246,241, 7,255, 9, 14,248,227, 21, 18,228, 28,234,237, 27,
|
||||
0,198,145, 87, 63,249,174,104,126,184,239, 41, 65,135,208, 22,
|
||||
0,214,177,103,127,169,206, 24,254, 40, 79,153,129, 87, 48,230,
|
||||
0, 38, 76,106,152,190,212,242, 45, 11, 97, 71,181,147,249,223,
|
||||
0, 54,108, 90,216,238,180,130,173,155,193,247,117, 67, 25, 47,
|
||||
0, 6, 12, 10, 24, 30, 20, 18, 48, 54, 60, 58, 40, 46, 36, 34,
|
||||
0, 22, 44, 58, 88, 78,116, 98,176,166,156,138,232,254,196,210,
|
||||
0,102,204,170,133,227, 73, 47, 23,113,219,189,146,244, 94, 56,
|
||||
0,118,236,154,197,179, 41, 95,151,225,123, 13, 82, 36,190,200,
|
||||
0, 70,140,202, 5, 67,137,207, 10, 76,134,192, 15, 73,131,197,
|
||||
0, 86,172,250, 69, 19,233,191,138,220, 38,112,207,153, 99, 53,
|
||||
0,187,107,208,214,109,189, 6,177, 10,218, 97,103,220, 12,183,
|
||||
0,171, 75,224,150, 61,221,118, 49,154,122,209,167, 12,236, 71,
|
||||
0,155, 43,176, 86,205,125,230,172, 55,135, 28,250, 97,209, 74,
|
||||
0,139, 11,128, 22,157, 29,150, 44,167, 39,172, 58,177, 49,186,
|
||||
0,251,235, 16,203, 48, 32,219,139,112, 96,155, 64,187,171, 80,
|
||||
0,235,203, 32,139, 96, 64,171, 11,224,192, 43,128,107, 75,160,
|
||||
0,219,171,112, 75,144,224, 59,150, 77, 61,230,221, 6,118,173,
|
||||
0,203,139, 64, 11,192,128, 75, 22,221,157, 86, 29,214,150, 93,
|
||||
0, 59,118, 77,236,215,154,161,197,254,179,136, 41, 18, 95,100,
|
||||
0, 43, 86,125,172,135,250,209, 69,110, 19, 56,233,194,191,148,
|
||||
0, 27, 54, 45,108,119, 90, 65,216,195,238,245,180,175,130,153,
|
||||
0, 11, 22, 29, 44, 39, 58, 49, 88, 83, 78, 69,116,127, 98,105,
|
||||
0,123,246,141,241,138, 7,124,255,132, 9,114, 14,117,248,131,
|
||||
0,107,214,189,177,218,103, 12,127, 20,169,194,206,165, 24,115,
|
||||
0, 91,182,237,113, 42,199,156,226,185, 84, 15,147,200, 37,126,
|
||||
0, 75,150,221, 49,122,167,236, 98, 41,244,191, 83, 24,197,142,
|
||||
};
|
||||
|
||||
/* clang-format on */
|
||||
#endif
|
||||
285
nanors/deps/obl/oblas_lite.c
Normal file
285
nanors/deps/obl/oblas_lite.c
Normal file
@@ -0,0 +1,285 @@
|
||||
// Note about the frequent use of #undef before most defines in this file: These exist to
|
||||
// prevent a lot of (harmless) redefined macro warnings if this file is re-included multiple
|
||||
// times in the same build context. Sunshine and Moonlight use this trick to bundle all arch
|
||||
// builds into the same binary along with runtime detection. An example can be found at
|
||||
// https://github.com/LizardByte/Sunshine/blob/master/src/rswrapper.c
|
||||
|
||||
#include "oblas_lite.h"
|
||||
|
||||
#if defined(OBLAS_TINY)
|
||||
static inline uint8_t gf2_8_mul(uint16_t a, uint16_t b)
|
||||
{
|
||||
if (!a || !b) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Perform 8-bit, carry-less multiplication of |a| and |b|.
|
||||
return GF2_8_EXP[GF2_8_LOG[a] + GF2_8_LOG[b]];
|
||||
}
|
||||
|
||||
static void obl_axpy_ref(u8 *a, u8 *b, u8 u, unsigned k)
|
||||
{
|
||||
register u8 *ap = a, *ae = &a[k], *bp = b;
|
||||
for (; ap != ae; ap++, bp++)
|
||||
*ap ^= gf2_8_mul(u, *bp);
|
||||
}
|
||||
|
||||
static void obl_scal_ref(u8 *a, u8 *b, u8 u, unsigned k)
|
||||
{
|
||||
(void)b;
|
||||
register u8 *ap = a, *ae = &a[k];
|
||||
for (; ap != ae; ap++)
|
||||
*ap = gf2_8_mul(u, *ap);
|
||||
}
|
||||
|
||||
#else
|
||||
#include "gf2_8_mul_table.h"
|
||||
|
||||
static void obl_axpy_ref(u8 *a, u8 *b, u8 u, unsigned k)
|
||||
{
|
||||
register const u8 *u_row = &GF2_8_MUL[u << 8];
|
||||
register u8 *ap = a, *ae = &a[k], *bp = b;
|
||||
for (; ap != ae; ap++, bp++)
|
||||
*ap ^= u_row[*bp];
|
||||
}
|
||||
|
||||
static void obl_scal_ref(u8 *a, u8 *b, u8 u, unsigned k)
|
||||
{
|
||||
(void)b;
|
||||
register const u8 *u_row = &GF2_8_MUL[u << 8];
|
||||
register u8 *ap = a, *ae = &a[k];
|
||||
for (; ap != ae; ap++)
|
||||
*ap = u_row[*ap];
|
||||
}
|
||||
#endif
|
||||
|
||||
void obl_axpyb32_ref(u8 *a, u32 *b, u8 u, unsigned k)
|
||||
{
|
||||
for (unsigned idx = 0, p = 0; idx < k; idx += 8 * sizeof(u32), p++) {
|
||||
u32 tmp = b[p];
|
||||
while (tmp > 0) {
|
||||
#ifdef _MSC_VER
|
||||
unsigned long index = 0;
|
||||
_BitScanForward(&index, tmp);
|
||||
unsigned tz = (unsigned int)index;
|
||||
#else
|
||||
unsigned tz = __builtin_ctz(tmp);
|
||||
#endif
|
||||
tmp = tmp & (tmp - 1);
|
||||
a[tz + idx] ^= u;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(OBLAS_AVX512)
|
||||
#include <immintrin.h>
|
||||
|
||||
#undef OBLAS_ALIGN
|
||||
#define OBLAS_ALIGN 64
|
||||
|
||||
#undef OBL_SHUF
|
||||
#define OBL_SHUF(op, a, b, f) \
|
||||
do { \
|
||||
const u8 *u_lo = GF2_8_SHUF_LO + u * 16; \
|
||||
const u8 *u_hi = GF2_8_SHUF_HI + u * 16; \
|
||||
const __m512i mask = _mm512_set1_epi8(0x0f); \
|
||||
const __m128i ulo_128 = _mm_loadu_si128((__m128i *)u_lo); \
|
||||
const __m128i uhi_128 = _mm_loadu_si128((__m128i *)u_hi); \
|
||||
const __m512i urow_lo = _mm512_broadcast_i32x4(ulo_128); \
|
||||
const __m512i urow_hi = _mm512_broadcast_i32x4(uhi_128); \
|
||||
__m512i *ap = (__m512i *)a, *ae = (__m512i *)(a + k - (k % sizeof(__m512i))), *bp = (__m512i *)b; \
|
||||
for (; ap < ae; ap++, bp++) { \
|
||||
__m512i bx = _mm512_loadu_si512(bp); \
|
||||
__m512i lo = _mm512_and_si512(bx, mask); \
|
||||
bx = _mm512_srli_epi64(bx, 4); \
|
||||
__m512i hi = _mm512_and_si512(bx, mask); \
|
||||
lo = _mm512_shuffle_epi8(urow_lo, lo); \
|
||||
hi = _mm512_shuffle_epi8(urow_hi, hi); \
|
||||
_mm512_storeu_si512(ap, f(_mm512_loadu_si512(ap), _mm512_xor_si512(lo, hi))); \
|
||||
} \
|
||||
op##_ref((u8 *)ap, (u8 *)bp, u, k % sizeof(__m512i)); \
|
||||
} while (0)
|
||||
|
||||
#undef OBL_SHUF_XOR
|
||||
#define OBL_SHUF_XOR _mm512_xor_si512
|
||||
|
||||
#undef OBL_AXPYB32
|
||||
#define OBL_AXPYB32(a, b, u, k) \
|
||||
do { \
|
||||
__m512i *ap = (__m512i *)a, *ae = (__m512i *)(a + k); \
|
||||
__m512i scatter = \
|
||||
_mm512_set_epi32(0x03030303, 0x03030303, 0x02020202, 0x02020202, 0x01010101, 0x01010101, 0x00000000, 0x00000000, \
|
||||
0x03030303, 0x03030303, 0x02020202, 0x02020202, 0x01010101, 0x01010101, 0x00000000, 0x00000000); \
|
||||
__m512i cmpmask = \
|
||||
_mm512_set_epi32(0x80402010, 0x08040201, 0x80402010, 0x08040201, 0x80402010, 0x08040201, 0x80402010, 0x08040201, \
|
||||
0x80402010, 0x08040201, 0x80402010, 0x08040201, 0x80402010, 0x08040201, 0x80402010, 0x08040201); \
|
||||
__m512i up = _mm512_set1_epi8(u); \
|
||||
for (unsigned p = 0; ap < ae; p++, ap++) { \
|
||||
__m512i bcast = _mm512_set1_epi32(b[p]); \
|
||||
__m512i ret = _mm512_shuffle_epi8(bcast, scatter); \
|
||||
ret = _mm512_andnot_si512(ret, cmpmask); \
|
||||
__mmask64 tmp = _mm512_cmpeq_epi8_mask(ret, _mm512_setzero_si512()); \
|
||||
ret = _mm512_mask_blend_epi8(tmp, _mm512_setzero_si512(), up); \
|
||||
_mm512_storeu_si512(ap, _mm512_xor_si512(_mm512_loadu_si512(ap), ret)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
#if defined(OBLAS_AVX2)
|
||||
#include <immintrin.h>
|
||||
|
||||
#undef OBLAS_ALIGN
|
||||
#define OBLAS_ALIGN 32
|
||||
|
||||
#undef OBL_SHUF
|
||||
#define OBL_SHUF(op, a, b, f) \
|
||||
do { \
|
||||
const u8 *u_lo = GF2_8_SHUF_LO + u * 16; \
|
||||
const u8 *u_hi = GF2_8_SHUF_HI + u * 16; \
|
||||
const __m256i mask = _mm256_set1_epi8(0x0f); \
|
||||
const __m256i urow_lo = _mm256_loadu2_m128i((__m128i *)u_lo, (__m128i *)u_lo); \
|
||||
const __m256i urow_hi = _mm256_loadu2_m128i((__m128i *)u_hi, (__m128i *)u_hi); \
|
||||
__m256i *ap = (__m256i *)a, *ae = (__m256i *)(a + k - (k % sizeof(__m256i))), *bp = (__m256i *)b; \
|
||||
for (; ap < ae; ap++, bp++) { \
|
||||
__m256i bx = _mm256_loadu_si256(bp); \
|
||||
__m256i lo = _mm256_and_si256(bx, mask); \
|
||||
bx = _mm256_srli_epi64(bx, 4); \
|
||||
__m256i hi = _mm256_and_si256(bx, mask); \
|
||||
lo = _mm256_shuffle_epi8(urow_lo, lo); \
|
||||
hi = _mm256_shuffle_epi8(urow_hi, hi); \
|
||||
_mm256_storeu_si256(ap, f(_mm256_loadu_si256(ap), _mm256_xor_si256(lo, hi))); \
|
||||
} \
|
||||
op##_ref((u8 *)ap, (u8 *)bp, u, k % sizeof(__m256i)); \
|
||||
} while (0)
|
||||
|
||||
#undef OBL_SHUF_XOR
|
||||
#define OBL_SHUF_XOR _mm256_xor_si256
|
||||
|
||||
#undef OBL_AXPYB32
|
||||
#define OBL_AXPYB32(a, b, u, k) \
|
||||
do { \
|
||||
__m256i *ap = (__m256i *)a, *ae = (__m256i *)(a + k); \
|
||||
__m256i scatter = \
|
||||
_mm256_set_epi32(0x03030303, 0x03030303, 0x02020202, 0x02020202, 0x01010101, 0x01010101, 0x00000000, 0x00000000); \
|
||||
__m256i cmpmask = \
|
||||
_mm256_set_epi32(0x80402010, 0x08040201, 0x80402010, 0x08040201, 0x80402010, 0x08040201, 0x80402010, 0x08040201); \
|
||||
__m256i up = _mm256_set1_epi8(u); \
|
||||
for (unsigned p = 0; ap < ae; p++, ap++) { \
|
||||
__m256i bcast = _mm256_set1_epi32(b[p]); \
|
||||
__m256i ret = _mm256_shuffle_epi8(bcast, scatter); \
|
||||
ret = _mm256_andnot_si256(ret, cmpmask); \
|
||||
ret = _mm256_and_si256(_mm256_cmpeq_epi8(ret, _mm256_setzero_si256()), up); \
|
||||
_mm256_storeu_si256(ap, _mm256_xor_si256(_mm256_loadu_si256(ap), ret)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
#if defined(OBLAS_SSE3) || defined(OBLAS_NEON)
|
||||
#if defined(OBLAS_NEON)
|
||||
#include "sse2neon/sse2neon.h"
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
#undef OBLAS_ALIGN
|
||||
#define OBLAS_ALIGN 16
|
||||
|
||||
#undef OBL_SHUF
|
||||
#define OBL_SHUF(op, a, b, f) \
|
||||
do { \
|
||||
const u8 *u_lo = GF2_8_SHUF_LO + u * 16; \
|
||||
const u8 *u_hi = GF2_8_SHUF_HI + u * 16; \
|
||||
const __m128i mask = _mm_set1_epi8(0x0f); \
|
||||
const __m128i urow_lo = _mm_loadu_si128((__m128i *)u_lo); \
|
||||
const __m128i urow_hi = _mm_loadu_si128((__m128i *)u_hi); \
|
||||
__m128i *ap = (__m128i *)a, *ae = (__m128i *)(a + k - (k % sizeof(__m128i))), *bp = (__m128i *)b; \
|
||||
for (; ap < ae; ap++, bp++) { \
|
||||
__m128i bx = _mm_loadu_si128(bp); \
|
||||
__m128i lo = _mm_and_si128(bx, mask); \
|
||||
bx = _mm_srli_epi64(bx, 4); \
|
||||
__m128i hi = _mm_and_si128(bx, mask); \
|
||||
lo = _mm_shuffle_epi8(urow_lo, lo); \
|
||||
hi = _mm_shuffle_epi8(urow_hi, hi); \
|
||||
_mm_storeu_si128(ap, f(_mm_loadu_si128(ap), _mm_xor_si128(lo, hi))); \
|
||||
} \
|
||||
op##_ref((u8 *)ap, (u8 *)bp, u, k % sizeof(__m128i)); \
|
||||
} while (0)
|
||||
|
||||
#undef OBL_SHUF_XOR
|
||||
#define OBL_SHUF_XOR _mm_xor_si128
|
||||
|
||||
#undef OBL_AXPYB32
|
||||
#define OBL_AXPYB32(a, b, u, k) \
|
||||
do { \
|
||||
__m128i *ap = (__m128i *)a, *ae = (__m128i *)(a + k); \
|
||||
__m128i scatter_hi = _mm_set_epi32(0x03030303, 0x03030303, 0x02020202, 0x02020202); \
|
||||
__m128i scatter_lo = _mm_set_epi32(0x01010101, 0x01010101, 0x00000000, 0x00000000); \
|
||||
__m128i cmpmask = _mm_set_epi32(0x80402010, 0x08040201, 0x80402010, 0x08040201); \
|
||||
__m128i up = _mm_set1_epi8(u); \
|
||||
for (unsigned p = 0; ap < ae; p++, ap++) { \
|
||||
__m128i bcast = _mm_set1_epi32(b[p]); \
|
||||
__m128i ret_lo = _mm_shuffle_epi8(bcast, scatter_lo); \
|
||||
__m128i ret_hi = _mm_shuffle_epi8(bcast, scatter_hi); \
|
||||
ret_lo = _mm_andnot_si128(ret_lo, cmpmask); \
|
||||
ret_hi = _mm_andnot_si128(ret_hi, cmpmask); \
|
||||
ret_lo = _mm_and_si128(_mm_cmpeq_epi8(ret_lo, _mm_setzero_si128()), up); \
|
||||
ret_hi = _mm_and_si128(_mm_cmpeq_epi8(ret_hi, _mm_setzero_si128()), up); \
|
||||
_mm_storeu_si128(ap, _mm_xor_si128(_mm_loadu_si128(ap), ret_lo)); \
|
||||
ap++; \
|
||||
_mm_storeu_si128(ap, _mm_xor_si128(_mm_loadu_si128(ap), ret_hi)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#undef OBLAS_ALIGN
|
||||
#define OBLAS_ALIGN (sizeof(void *))
|
||||
|
||||
#undef OBL_SHUF
|
||||
#define OBL_SHUF(op, a, b, f) \
|
||||
do { \
|
||||
op##_ref(a, b, u, k); \
|
||||
} while (0)
|
||||
|
||||
#undef OBL_SHUF_XOR
|
||||
#define OBL_SHUF_XOR
|
||||
|
||||
#undef OBL_AXPYB32
|
||||
#define OBL_AXPYB32 obl_axpyb32_ref
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define OBL_NOOP(a, b) (b)
|
||||
void obl_axpy(u8 *a, u8 *b, u8 u, unsigned k)
|
||||
{
|
||||
if (u == 1) {
|
||||
register u8 *ap = a, *ae = &a[k], *bp = b;
|
||||
for (; ap < ae; ap++, bp++)
|
||||
*ap ^= *bp;
|
||||
} else {
|
||||
OBL_SHUF(obl_axpy, a, b, OBL_SHUF_XOR);
|
||||
}
|
||||
}
|
||||
|
||||
void obl_scal(u8 *a, u8 u, unsigned k)
|
||||
{
|
||||
OBL_SHUF(obl_scal, a, a, OBL_NOOP);
|
||||
}
|
||||
|
||||
void obl_swap(u8 *a, u8 *b, unsigned k)
|
||||
{
|
||||
register u8 *ap = a, *ae = &a[k], *bp = b;
|
||||
for (; ap < ae; ap++, bp++) {
|
||||
u8 tmp = *ap;
|
||||
*ap = *bp;
|
||||
*bp = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
void obl_axpyb32(u8 *a, u32 *b, u8 u, unsigned k)
|
||||
{
|
||||
OBL_AXPYB32(a, b, u, k);
|
||||
}
|
||||
11
nanors/deps/obl/oblas_lite.h
Normal file
11
nanors/deps/obl/oblas_lite.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include "gf2_8_tables.h"
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint32_t u32;
|
||||
|
||||
void obl_axpy(u8 *a, u8 *b, u8 u, unsigned k);
|
||||
void obl_scal(u8 *a, u8 u, unsigned k);
|
||||
void obl_swap(u8 *a, u8 *b, unsigned k);
|
||||
void obl_axpyb32(u8 *a, u32 *b, u8 u, unsigned k);
|
||||
Reference in New Issue
Block a user