/* * fec.c -- forward error correction based on Vandermonde matrices * * (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it) * (C) 2001 Alain Knaff (alain@knaff.lu) * (C) 2017 Iwan Timmer (irtimmer@gmail.com) * * Portions derived from code by Phil Karn (karn@ka9q.ampr.org), * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. */ #include #include #include #include #include "rs.h" #ifdef _MSC_VER #define NEED_ALLOCA #define alloca(x) _alloca(x) #endif typedef unsigned char gf; #define GF_BITS 8 #define GF_PP "101110001" #define GF_SIZE ((1 << GF_BITS) - 1) #define SWAP(a,b,t) {t tmp; tmp=a; a=b; b=tmp;} /* * USE_GF_MULC, GF_MULC0(c) and GF_ADDMULC(x) can be used when multiplying * many numbers by the same constant. In this case the first * call sets the constant, and others perform the multiplications. * A value related to the multiplication is held in a local variable * declared with USE_GF_MULC . See usage in addmul1(). */ #define USE_GF_MULC register gf * __gf_mulc_ #define GF_MULC0(c) __gf_mulc_ = &gf_mul_table[(c)<<8] #define GF_ADDMULC(dst, x) dst ^= __gf_mulc_[x] #define GF_MULC(dst, x) dst = __gf_mulc_[x] #define gf_mul(x,y) gf_mul_table[(x<<8)+y] /* * To speed up computations, we have tables for logarithm, exponent * multiplication and inverse of a number. */ static gf gf_exp[2*GF_SIZE]; static int gf_log[GF_SIZE + 1]; static gf inverse[GF_SIZE+1]; #ifdef _MSC_VER static gf __declspec(align (256)) gf_mul_table[(GF_SIZE + 1)*(GF_SIZE + 1)]; #else static gf gf_mul_table[(GF_SIZE + 1)*(GF_SIZE + 1)] __attribute__((aligned (256))); #endif /* * modnn(x) computes x % GF_SIZE, where GF_SIZE is 2**GF_BITS - 1, * without a slow divide. */ static inline gf modnn(int x) { while (x >= GF_SIZE) { x -= GF_SIZE; x = (x >> GF_BITS) + (x & GF_SIZE); } return x; } static void addmul(gf *dst1, gf *src1, gf c, int sz) { USE_GF_MULC; if (c != 0) { register gf *dst = dst1, *src = src1; gf *lim = &dst[sz]; GF_MULC0(c); for (; dst < lim; dst++, src++) GF_ADDMULC(*dst, *src); } } static void mul(gf *dst1, gf *src1, gf c, int sz) { USE_GF_MULC; if (c != 0) { register gf *dst = dst1, *src = src1; gf *lim = &dst[sz]; GF_MULC0(c); for (; dst < lim; dst++, src++) GF_MULC(*dst , *src); } else memset(dst1, 0, c); } /* y = a.dot(b) */ static gf* multiply1(gf *a, int ar, int ac, gf *b, int br, int bc) { gf *new_m, tg; int r, c, i, ptr = 0; assert(ac == br); new_m = (gf*) calloc(1, ar*bc); if (NULL != new_m) { /* this multiply is slow */ for (r = 0; r < ar; r++) { for (c = 0; c < bc; c++) { tg = 0; for (i = 0; i < ac; i++) tg ^= gf_mul(a[r*ac+i], b[i*bc+c]); new_m[ptr++] = tg; } } } return new_m; } static void init_mul_table(void) { int i, j; for (i=0; i< GF_SIZE+1; i++) for (j=0; j< GF_SIZE+1; j++) gf_mul_table[(i<<8)+j] = gf_exp[modnn(gf_log[i] + gf_log[j]) ] ; for (j=0; j< GF_SIZE+1; j++) gf_mul_table[j] = gf_mul_table[j<<8] = 0; } /* * initialize the data structures used for computations in GF. */ static void generate_gf(void) { int i; gf mask; mask = 1; gf_exp[GF_BITS] = 0; /* * first, generate the (polynomial representation of) powers of \alpha, * which are stored in gf_exp[i] = \alpha ** i . * At the same time build gf_log[gf_exp[i]] = i . * The first GF_BITS powers are simply bits shifted to the left. */ for (i = 0; i < GF_BITS; i++, mask <<= 1) { gf_exp[i] = mask; gf_log[gf_exp[i]] = i; /* * If GF_PP[i] == 1 then \alpha ** i occurs in poly-repr * gf_exp[GF_BITS] = \alpha ** GF_BITS */ if (GF_PP[i] == '1') gf_exp[GF_BITS] ^= mask; } /* * now gf_exp[GF_BITS] = \alpha ** GF_BITS is complete, so can als * compute its inverse. */ gf_log[gf_exp[GF_BITS]] = GF_BITS; /* * Poly-repr of \alpha ** (i+1) is given by poly-repr of * \alpha ** i shifted left one-bit and accounting for any * \alpha ** GF_BITS term that may occur when poly-repr of * \alpha ** i is shifted. */ mask = 1 << (GF_BITS - 1) ; for (i = GF_BITS + 1; i < GF_SIZE; i++) { if (gf_exp[i - 1] >= mask) gf_exp[i] = gf_exp[GF_BITS] ^ ((gf_exp[i - 1] ^ mask) << 1); else gf_exp[i] = gf_exp[i - 1] << 1; gf_log[gf_exp[i]] = i; } /* * log(0) is not defined, so use a special value */ gf_log[0] = GF_SIZE; /* set the extended gf_exp values for fast multiply */ for (i = 0; i < GF_SIZE; i++) gf_exp[i + GF_SIZE] = gf_exp[i]; /* * again special cases. 0 has no inverse. This used to * be initialized to GF_SIZE, but it should make no difference * since noone is supposed to read from here. */ inverse[0] = 0; inverse[1] = 1; for (i=2; i<=GF_SIZE; i++) inverse[i] = gf_exp[GF_SIZE-gf_log[i]]; } /* * invert_mat() takes a matrix and produces its inverse * k is the size of the matrix. * (Gauss-Jordan, adapted from Numerical Recipes in C) * Return non-zero if singular. */ static int invert_mat(gf *src, int k) { gf c, *p; int irow, icol, row, col, i, ix; int error = 1; #ifdef NEED_ALLOCA int *indxc = alloca(k*sizeof(int)); int *indxr = alloca(k*sizeof(int)); int *ipiv = alloca(k*sizeof(int)); gf *id_row = alloca(k*sizeof(gf)); #else int indxc[k]; int indxr[k]; int ipiv[k]; gf id_row[k]; #endif memset(id_row, 0, k*sizeof(gf)); /* * ipiv marks elements already used as pivots. */ for (i = 0; i < k; i++) ipiv[i] = 0; for (col = 0; col < k; col++) { gf *pivot_row; /* * Zeroing column 'col', look for a non-zero element. * First try on the diagonal, if it fails, look elsewhere. */ irow = icol = -1; if (ipiv[col] != 1 && src[col*k + col] != 0) { irow = col; icol = col; goto found_piv; } for (row = 0; row < k; row++) { if (ipiv[row] != 1) { for (ix = 0; ix < k; ix++) { if (ipiv[ix] == 0) { if (src[row*k + ix] != 0) { irow = row; icol = ix; goto found_piv; } } else if (ipiv[ix] > 1) { fprintf(stderr, "singular matrix\n"); goto fail; } } } } if (icol == -1) { fprintf(stderr, "XXX pivot not found!\n"); goto fail ; } found_piv: ++(ipiv[icol]); /* * swap rows irow and icol, so afterwards the diagonal * element will be correct. Rarely done, not worth * optimizing. */ if (irow != icol) { for (ix = 0; ix < k; ix++) { SWAP(src[irow*k + ix], src[icol*k + ix], gf); } } indxr[col] = irow; indxc[col] = icol; pivot_row = &src[icol*k]; c = pivot_row[icol]; if (c == 0) { fprintf(stderr, "singular matrix 2\n"); goto fail; } else if (c != 1 ) { /* * this is done often , but optimizing is not so * fruitful, at least in the obvious ways (unrolling) */ c = inverse[ c ]; pivot_row[icol] = 1; for (ix = 0; ix < k; ix++) pivot_row[ix] = gf_mul(c, pivot_row[ix]); } /* * from all rows, remove multiples of the selected row * to zero the relevant entry (in fact, the entry is not zero * because we know it must be zero). * (Here, if we know that the pivot_row is the identity, * we can optimize the addmul). */ id_row[icol] = 1; if (memcmp(pivot_row, id_row, k*sizeof(gf)) != 0) { for (p = src, ix = 0 ; ix < k ; ix++, p += k) { if (ix != icol) { c = p[icol]; p[icol] = 0; addmul(p, pivot_row, c, k); } } } id_row[icol] = 0; } for (col = k-1 ; col >= 0 ; col-- ) { if (indxr[col] <0 || indxr[col] >= k) fprintf(stderr, "AARGH, indxr[col] %d\n", indxr[col]); else if (indxc[col] <0 || indxc[col] >= k) fprintf(stderr, "AARGH, indxc[col] %d\n", indxc[col]); else if (indxr[col] != indxc[col] ) { for (row = 0 ; row < k ; row++ ) SWAP( src[row*k + indxr[col]], src[row*k + indxc[col]], gf); } } error = 0; fail: return error ; } /* * Not check for input params * */ static gf* sub_matrix(gf* matrix, int rmin, int cmin, int rmax, int cmax, int nrows, int ncols) { int i, j, ptr = 0; gf* new_m = (gf*) malloc((rmax-rmin) * (cmax-cmin)); if (NULL != new_m) { for (i = rmin; i < rmax; i++) { for (j = cmin; j < cmax; j++) { new_m[ptr++] = matrix[i*ncols + j]; } } } return new_m; } /* copy from golang rs version */ static inline int code_some_shards(gf* matrixRows, gf** inputs, gf** outputs, int dataShards, int outputCount, int byteCount) { gf* in; int iRow, c; for (c = 0; c < dataShards; c++) { in = inputs[c]; for (iRow = 0; iRow < outputCount; iRow++) { if (0 == c) mul(outputs[iRow], in, matrixRows[iRow*dataShards+c], byteCount); else addmul(outputs[iRow], in, matrixRows[iRow*dataShards+c], byteCount); } } return 0; } void reed_solomon_init(void) { generate_gf(); init_mul_table(); } reed_solomon* reed_solomon_new(int data_shards, int parity_shards) { gf* vm = NULL; gf* top = NULL; int err = 0; reed_solomon* rs = NULL; do { rs = malloc(sizeof(reed_solomon)); if (NULL == rs) return NULL; rs->data_shards = data_shards; rs->parity_shards = parity_shards; rs->shards = (data_shards + parity_shards); rs->m = NULL; rs->parity = NULL; if (rs->shards > DATA_SHARDS_MAX || data_shards <= 0 || parity_shards <= 0) { err = 1; break; } vm = (gf*)malloc(data_shards * rs->shards); if (NULL == vm) { err = 2; break; } int ptr = 0; for (int row = 0; row < rs->shards; row++) { for (int col = 0; col < data_shards; col++) vm[ptr++] = row == col ? 1 : 0; } top = sub_matrix(vm, 0, 0, data_shards, data_shards, rs->shards, data_shards); if (NULL == top) { err = 3; break; } err = invert_mat(top, data_shards); assert(0 == err); rs->m = multiply1(vm, rs->shards, data_shards, top, data_shards, data_shards); if (NULL == rs->m) { err = 4; break; } for (int j = 0; j < parity_shards; j++) { for (int i = 0; i < data_shards; i++) rs->m[(data_shards + j)*data_shards + i] = inverse[(parity_shards + i) ^ j]; } rs->parity = sub_matrix(rs->m, data_shards, 0, rs->shards, data_shards, rs->shards, data_shards); if (NULL == rs->parity) { err = 5; break; } free(vm); free(top); vm = NULL; top = NULL; return rs; } while(0); fprintf(stderr, "err=%d\n", err); if (NULL != vm) free(vm); if (NULL != top) free(top); if (NULL != rs) { if (NULL != rs->m) free(rs->m); if (NULL != rs->parity) free(rs->parity); free(rs); } return NULL; } void reed_solomon_release(reed_solomon* rs) { if (NULL != rs) { if (NULL != rs->m) free(rs->m); if (NULL != rs->parity) free(rs->parity); free(rs); } } /** * decode one shard * input: * rs * original data_blocks[rs->data_shards][block_size] * dec_fec_blocks[nr_fec_blocks][block_size] * fec_block_nos: fec pos number in original fec_blocks * erased_blocks: erased blocks in original data_blocks * nr_fec_blocks: the number of erased blocks * */ static int reed_solomon_decode(reed_solomon* rs, unsigned char **data_blocks, int block_size, unsigned char **dec_fec_blocks, unsigned int *fec_block_nos, unsigned int *erased_blocks, int nr_fec_blocks) { /* use stack instead of malloc, define a small number of DATA_SHARDS_MAX to save memory */ gf dataDecodeMatrix[DATA_SHARDS_MAX*DATA_SHARDS_MAX]; unsigned char* subShards[DATA_SHARDS_MAX]; unsigned char* outputs[DATA_SHARDS_MAX]; gf* m = rs->m; int i, j, c, swap, subMatrixRow, dataShards; /* the erased_blocks should always sorted * if sorted, nr_fec_blocks times to check it * if not, sort it here * */ for (i = 0; i < nr_fec_blocks; i++) { swap = 0; for (j = i+1; j < nr_fec_blocks; j++) { if (erased_blocks[i] > erased_blocks[j]) { /* the prefix is bigger than the following, swap */ c = erased_blocks[i]; erased_blocks[i] = erased_blocks[j]; erased_blocks[j] = c; swap = 1; } } if (!swap) break; } j = 0; subMatrixRow = 0; dataShards = rs->data_shards; for (i = 0; i < dataShards; i++) { if (j < nr_fec_blocks && i == (int)erased_blocks[j]) j++; else { /* this row is ok */ for (c = 0; c < dataShards; c++) dataDecodeMatrix[subMatrixRow*dataShards + c] = m[i*dataShards + c]; subShards[subMatrixRow] = data_blocks[i]; subMatrixRow++; } } for (i = 0; i < nr_fec_blocks && subMatrixRow < dataShards; i++) { subShards[subMatrixRow] = dec_fec_blocks[i]; j = dataShards + fec_block_nos[i]; for (c = 0; c < dataShards; c++) dataDecodeMatrix[subMatrixRow*dataShards + c] = m[j*dataShards + c]; subMatrixRow++; } if (subMatrixRow < dataShards) return -1; invert_mat(dataDecodeMatrix, dataShards); for (i = 0; i < nr_fec_blocks; i++) { j = erased_blocks[i]; outputs[i] = data_blocks[j]; memmove(dataDecodeMatrix+i*dataShards, dataDecodeMatrix+j*dataShards, dataShards); } return code_some_shards(dataDecodeMatrix, subShards, outputs, dataShards, nr_fec_blocks, block_size); } /** * encode a big size of buffer * input: * rs * nr_shards: assert(0 == nr_shards % rs->shards) * shards[nr_shards][block_size] * */ int reed_solomon_encode(reed_solomon* rs, unsigned char** shards, int nr_shards, int block_size) { unsigned char** data_blocks; unsigned char** fec_blocks; int i, ds = rs->data_shards, ps = rs->parity_shards, ss = rs->shards; i = nr_shards / ss; data_blocks = shards; fec_blocks = &shards[(i*ds)]; for (i = 0; i < nr_shards; i += ss) { code_some_shards(rs->parity, data_blocks, fec_blocks, rs->data_shards, rs->parity_shards, block_size); data_blocks += ds; fec_blocks += ps; } return 0; } /** * reconstruct a big size of buffer * input: * rs * nr_shards: assert(0 == nr_shards % rs->data_shards) * shards[nr_shards][block_size] * marks[nr_shards] marks as errors * */ int reed_solomon_reconstruct(reed_solomon* rs, unsigned char** shards, unsigned char* marks, int nr_shards, int block_size) { unsigned char *dec_fec_blocks[DATA_SHARDS_MAX]; unsigned int fec_block_nos[DATA_SHARDS_MAX]; unsigned int erased_blocks[DATA_SHARDS_MAX]; unsigned char* fec_marks; unsigned char **data_blocks, **fec_blocks; int i, j, dn, pn, n; int ds = rs->data_shards; int ps = rs->parity_shards; int err = 0; data_blocks = shards; n = nr_shards / rs->shards; fec_marks = marks + n*ds; //after all data, is't fec marks fec_blocks = shards + n*ds; for (j = 0; j < n; j++) { dn = 0; for (i = 0; i < ds; i++) { if (marks[i]) erased_blocks[dn++] = i; } if (dn > 0) { pn = 0; for (i = 0; i < ps && pn < dn; i++) { if (!fec_marks[i]) { //got valid fec row fec_block_nos[pn] = i; dec_fec_blocks[pn] = fec_blocks[i]; pn++; } } if (dn == pn) { reed_solomon_decode(rs, data_blocks, block_size, dec_fec_blocks, fec_block_nos, erased_blocks, dn); } else err = -1; } data_blocks += ds; marks += ds; fec_blocks += ps; fec_marks += ps; } return err; }