diff --git a/src/core.h b/src/core.h index 21d39a4d..61340f3f 100644 --- a/src/core.h +++ b/src/core.h @@ -42,6 +42,10 @@ template struct DoubleSize { }; template <> +struct DoubleSize { + typedef uint16_t T; +}; +template <> struct DoubleSize { typedef uint32_t T; }; @@ -62,6 +66,10 @@ template struct SignedDoubleSize { }; template <> +struct SignedDoubleSize { + typedef int16_t T; +}; +template <> struct SignedDoubleSize { typedef int32_t T; }; diff --git a/src/fec_base.h b/src/fec_base.h index 58c18a36..2802c007 100644 --- a/src/fec_base.h +++ b/src/fec_base.h @@ -1349,7 +1349,7 @@ void FecCode::decode( if (type == FecType::SYSTEMATIC) { this->fft->fft(*dec_inter_codeword, output); for (unsigned i = 0; i < this->n_data; i++) { - output.copy(i, dec_inter_codeword->get(i)); + output.copy(*dec_inter_codeword, i, i); } } } diff --git a/src/fft_2n.h b/src/fft_2n.h index fec3fb19..1862362d 100644 --- a/src/fft_2n.h +++ b/src/fft_2n.h @@ -152,7 +152,7 @@ class Radix2 : public FourierTransform { size_t simd_trailing_len; size_t simd_offset; - std::unique_ptr rev = nullptr; + std::vector rev; std::unique_ptr> W = nullptr; std::unique_ptr> inv_W = nullptr; T* vec_W; @@ -192,7 +192,7 @@ Radix2::Radix2(const gf::Field& gf, int n, int data_len, size_t pkt_size) card = this->gf->card(); card_minus_one = this->gf->card_minus_one(); - rev = std::unique_ptr(new T[n]); + rev.reserve(n); init_bitrev(); // Indices used for accelerated functions @@ -361,21 +361,7 @@ void Radix2::fft(vec::Buffers& output, vec::Buffers& input) const unsigned group_len = (input_len > data_len) ? len / input_len : len / data_len; - const std::vector& i_mem = input.get_mem(); - const std::vector& o_mem = output.get_mem(); - - for (unsigned idx = 0; idx < input_len; ++idx) { - // set output = scramble(input), i.e. bit reversal ordering - for (unsigned i = rev[idx]; i < rev[idx] + group_len; ++i) { - memcpy(o_mem[i], i_mem[idx], buf_size); - } - } - for (unsigned idx = input_len; idx < data_len; ++idx) { - // set output = scramble(input), i.e. bit reversal ordering - for (unsigned i = rev[idx]; i < rev[idx] + group_len; ++i) { - memset(o_mem[i], 0, buf_size); - } - } + output.radix2_fft_prepare(input, rev, data_len, group_len); // ---------------------- // Two layers at a time @@ -513,38 +499,29 @@ void Radix2::fft_inv(vec::Buffers& output, vec::Buffers& input) bit_rev_permute(output); // copy input to output - const std::vector& i_mem = input.get_mem(); - const std::vector& o_mem = output.get_mem(); - unsigned i; - for (i = 0; i < input_len; ++i) { - memcpy(o_mem[i], i_mem[i], buf_size); - } + output.radix2_fft_inv_prepare(input); unsigned m = len / 2; + unsigned doubled_m = len; if (input_len < len) { - const unsigned input_len_power_2 = arith::ceil2(input_len); - for (; i < input_len_power_2; ++i) { - memset(o_mem[i], 0, buf_size); - } - // For Q are zeros only => Q = c * P for (; m >= input_len; m /= 2) { - unsigned doubled_m = 2 * m; for (unsigned j = 0; j < m; ++j) { const T r = inv_W->get(j * len / doubled_m); butterfly_gs_step_simple(output, r, j, m, doubled_m); } + doubled_m = m; } } // Next, normal butterlfy GS is performed for (; m >= 1; m /= 2) { - unsigned doubled_m = 2 * m; for (unsigned j = 0; j < m; ++j) { const T r = inv_W->get(j * len / doubled_m); butterfly_gs_step(output, r, j, m, doubled_m); } + doubled_m = m; } // 2nd reversion of elements of output to return its natural order diff --git a/src/fft_single.h b/src/fft_single.h index cd3cf6bc..badc2f0b 100644 --- a/src/fft_single.h +++ b/src/fft_single.h @@ -89,9 +89,9 @@ void Single::ifft(vec::Vector& output, vec::Vector& input) template void Single::fft(vec::Buffers& output, vec::Buffers& input) { - T* buf = input.get(0); - for (int i = 0; i < this->n; i++) - output.copy(i, buf); + for (int i = 0; i < this->n; i++) { + output.copy(input, 0, i); + } } /* @@ -109,7 +109,7 @@ template void Single::ifft(vec::Buffers& output, vec::Buffers& input) { output.zero_fill(); - output.copy(0, input.get(0)); + output.copy(input, 0, 0); } } // namespace fft diff --git a/src/gf_ring.h b/src/gf_ring.h index 935edce6..523d7dd6 100644 --- a/src/gf_ring.h +++ b/src/gf_ring.h @@ -390,11 +390,11 @@ inline void RingModN::mul_vec_to_vecp( if (coef > 1 && coef < h) { this->mul_coef_to_buf(coef, src_mem[i], dest_mem[i], len); } else if (coef == 1) { - dest.copy(i, src_mem[i]); + dest.copy(src, i, i); } else if (coef == 0) { dest.fill(i, 0); } else if (coef == h) { - dest.copy(i, src_mem[i]); + dest.copy(src, i, i); this->neg(len, dest_mem[i]); } } diff --git a/src/vec_buffers.h b/src/vec_buffers.h index d31c69d3..a721a25b 100644 --- a/src/vec_buffers.h +++ b/src/vec_buffers.h @@ -52,6 +52,24 @@ class Buffers; template bool operator==(const Buffers& lhs, const Buffers& rhs); +// Return a mask of `len` ones +template +inline T mask(size_t len) +{ + return ((static_cast(1) << len) - 1); +} + +// Set a range of bits of `dest` according to bits of `src` +template +inline void set_bits(T src, uint8_t& dest, size_t bits_nb, size_t d_begin = 0) +{ + uint8_t m = mask(bits_nb); + // reset dest + dest &= ~(m << d_begin); + // set dest + dest |= (static_cast(src & m) << d_begin); +} + /// Available cases of allocating memory enum class BufMemAlloc { /// Do not allocate any memory @@ -72,25 +90,45 @@ enum class BufMemAlloc { FULL, }; -/** A vector of `n` buffers (array of T). +/** A vector of `n` DATA buffers (array of T) and an OPTIONAL vector of `n` META + * buffers (array of `uint8_t`). A meta buffer corresponds to a data buffer. + *Every `s`-byte data element of a data buffer corresponds to a `s`-bit element + *of the meta buffer, where `s = sizeof(T)`. A pair of data and meta elements + *can represent an integer of `8*s + s`-bits * - * A Buffers contains pointers to `n` buffers such as + * Each data element points to a buffer of size `m * sizeof(T)` bytes. + * Hence, each meta element points to a buffer of size + * `bsize = m * sizeof(T) / 8` bytes that should be an integer, i.e. + * m * sizeof(T) % 8 = 0. This condition is not difficult to achieved as a + * convenient `size` can be always chosen with a negligible impact on its + * application. * * Example: * - * We have a set of `N` independent buffers: + * We have two sets each of `N` independent buffers: + * + * data1 | data2 | … | dataN + * -------- | -------- | - | ------- + * data1[0] | data2[0] | … | dataN[0] + * data1[1] | data2[1] | … | dataN[1] + * … | … | … | … * - * buf1 | buf2 | … | bufN - * ------- | ------- | - | ------- - * buf1[0] | buf2[0] | … | bufN[0] - * buf1[1] | buf2[1] | … | bufN[1] - * … | … | … | … + * + * meta1 | meta2 | … | metaN + * -------- | -------- | - | ------- + * meta1[0] | meta2[0] | … | metaN[0] + * meta1[1] | meta2[1] | … | metaN[1] + * … | … | … | … * * In memory, the Buffers looks like: * - * v[0] | v[1] | … | v[n-1] - * ----- | ------ | - | -------- - * &buf1 | &buf2 | … | &bufN + * u[0] | u[1] | … | u[n-1] + * ------ | ------- | - | -------- + * &data1 | &data2 | … | &dataN + * + * v[0] | v[1] | … | v[n-1] + * ------ | ------- | - | -------- + * &meta1 | &meta2 | … | &metaN * * A Buffers can: * - owns all the memory (the vector of buffers and the buffers themselve). @@ -100,41 +138,149 @@ enum class BufMemAlloc { template class Buffers final { public: - Buffers(int n, size_t size); - Buffers(int n, size_t size, const std::vector& mem); + Buffers(int n, size_t size, bool has_meta = false); + Buffers( + int n, + size_t size, + const std::vector& mem, + const std::vector* meta = nullptr); Buffers(const Buffers& vec, int n = 0); Buffers(const Buffers& vec, int begin, int end); Buffers(const Buffers& vec1, const Buffers& vec2); Buffers(const Buffers& vec, const Vector& map, unsigned n); ~Buffers(); + bool has_meta() const; int get_n(void) const; size_t get_size(void) const; - int get_mem_len(void); + size_t get_meta_size(void) const; + size_t get_mem_len(void); void zero_fill(void); void fill(int i, T value); - void set(int i, T* buf); T* get(int i); const T* get(int i) const; + void get(int buf_id, size_t ele_id, T& hi, T& lo) const; + void set(int buf_id, size_t ele_id, T hi, T lo); + uint8_t* get_meta(int i); + const uint8_t* get_meta(int i) const; + T get_meta(int buf_id, size_t ele_id) const; + void set_meta(int buf_id, size_t ele_id, T val); const std::vector& get_mem() const; + const std::vector& get_meta() const; void set_mem(std::vector* mem); void copy(const Buffers& v); - void copy(int i, T* buf); - void separate_even_odd(); - void separate_even_odd(Buffers& even, Buffers& odd); + void copy(const Buffers& v, int src_id, int dest_id); + void radix2_fft_prepare( + const Buffers& input, + const std::vector& scrambler, + unsigned data_len, + unsigned group_len); + void radix2_fft_inv_prepare(const Buffers& input); + void reset_meta(); friend bool operator==(const Buffers& lhs, const Buffers& rhs); void dump(void); void swap(unsigned i, unsigned j); + /** Calculate meta size given a buffer size + * meta size := ceil(size * sizeof(T) / CHAR_BIT) + * + * @param s - given size, in words + * @return meta size, in bytes + */ + static size_t compute_meta_size(size_t s) + { + assert(s > 0); + + const size_t bytes = s * sizeof(T); + size_t r = bytes / CHAR_BIT; + while (r * CHAR_BIT < bytes) { + r++; + } + return r; + }; + + /** Calculate buffer size given a meta_size + * buffer size := ceil(meta_size * CHAR_BIT / sizeof(T)) + * + * @param m_size - given meta size, in bytes + * @return meta size + */ + static size_t compute_size(size_t m_size) + { + assert(m_size > 0); + + const size_t bytes = m_size * CHAR_BIT; + size_t r = bytes / sizeof(T); + while (r * sizeof(T) < bytes) { + r++; + } + return r; + }; + + /** Calculate conventional size of buffers + * Conentional size is the lowest number of words that is at least a given + * `s` and satisfy the following conditions: + * - its bytes is multiple of `size_alignment` + * - the correspondent `meta_size` is multiple of `meta_size_alignment` + * + * @param s - given size, in words + * @param size_alignment - alignment number of output size, in bytes + * @param meta_size_alignment - alignment number of meta size according to + * the out size, in byte + * @return conventional size, in words + */ + static size_t get_conv_size( + size_t s, + size_t size_alignment = 0, + size_t meta_size_alignment = 0) + { + assert(s > 0); + + if (size_alignment == 0) { + size_alignment = simd::ALIGNMENT; + } + + if (meta_size_alignment == 0) { + // it's the `meta_size` for a single element fitting a Register + meta_size_alignment = simd::ALIGNMENT / CHAR_BIT; + } + + // calculate meta_size + size_t m_size = Buffers::compute_meta_size(s); + // calculate new size according to `m_size` + size_t c_size = Buffers::compute_size(m_size); + + while (c_size * sizeof(T) % size_alignment != 0 + || m_size % meta_size_alignment != 0) { + m_size++; + c_size = Buffers::compute_size(m_size); + } + + return c_size; + }; + protected: std::vector mem; - int mem_len; + std::vector meta; + size_t mem_len; size_t size; + size_t meta_size = 0; int n; private: simd::AlignedAllocator allocator; + simd::AlignedAllocator allocator_meta; BufMemAlloc mem_alloc_case = BufMemAlloc::FULL; T* zeros = nullptr; + uint8_t* zeros_meta = nullptr; + bool m_meta = false; + + unsigned meta_bits_nb = 0; + T threshold = 0; + T half_element_mask; + T half_meta_mask; + + void init_meta(); + void allocate_meta(bool init_zero = false); }; /** @@ -144,7 +290,7 @@ class Buffers final { * @param size - number of elements of each memory pointed by a pointer of `mem` */ template -Buffers::Buffers(int n, size_t size) +Buffers::Buffers(int n, size_t size, bool has_meta) { this->n = n; this->size = size; @@ -155,6 +301,12 @@ Buffers::Buffers(int n, size_t size) for (int i = 0; i < n; i++) { mem.push_back(this->allocator.allocate(size)); } + + if (has_meta) { + this->m_meta = has_meta; + this->init_meta(); + this->allocate_meta(true); + } } /** @@ -165,13 +317,23 @@ Buffers::Buffers(int n, size_t size) * @param mem - a vector of buffers */ template -Buffers::Buffers(int n, size_t size, const std::vector& mem) +Buffers::Buffers( + int n, + size_t size, + const std::vector& mem, + const std::vector* meta) { this->n = n; this->size = size; this->mem_len = n * size; this->mem_alloc_case = BufMemAlloc::NONE; this->mem = mem; + + if (meta) { + this->m_meta = true; + this->init_meta(); + this->meta = *meta; + } } /** @@ -208,6 +370,22 @@ Buffers::Buffers(const Buffers& vec, int n) std::memset(mem[i], 0, this->size * sizeof(T)); } } + + this->m_meta = vec.has_meta(); + if (this->m_meta) { + this->init_meta(); + this->allocate_meta(); + + for (i = 0; i < copy_len; i++) { + std::copy_n(vec.get_meta(i), meta_size, meta[i]); + } + + if (this->n > vec_n) { // padding zeros + for (i = vec_n; i < this->n; i++) { + std::fill_n(meta[i], meta_size, 0); + } + } + } } /** @@ -246,6 +424,24 @@ Buffers::Buffers(const Buffers& vec, int begin, int end) mem.insert(mem.end(), vec_mem.begin() + begin, vec_mem.end()); mem.insert(mem.end(), end - vec.get_n(), zeros); } + + this->m_meta = vec.has_meta(); + if (this->m_meta) { + const std::vector vec_meta = vec.get_meta(); + this->init_meta(); + meta.reserve(this->n); + // slice from input buffers + if (end <= vec.get_n()) { + meta.insert( + meta.end(), vec_meta.begin() + begin, vec_meta.begin() + end); + } else { // slice and padding zeros + this->zeros_meta = this->allocator_meta.allocate(meta_size); + std::fill_n(this->zeros_meta, meta_size, 0); + + meta.insert(meta.end(), vec_meta.begin() + begin, vec_meta.end()); + meta.insert(meta.end(), end - vec.get_n(), zeros_meta); + } + } } /** @@ -258,11 +454,9 @@ template Buffers::Buffers(const Buffers& vec1, const Buffers& vec2) { assert(vec1.get_size() == vec2.get_size()); + assert(vec1.has_meta() == vec2.has_meta()); - int n1 = vec1.get_n(); - int n2 = vec2.get_n(); - - this->n = n1 + n2; + this->n = vec1.get_n() + vec2.get_n(); this->size = vec1.get_size(); this->mem_len = this->n * this->size; @@ -271,6 +465,14 @@ Buffers::Buffers(const Buffers& vec1, const Buffers& vec2) mem.reserve(this->n); mem.insert(mem.end(), vec1.get_mem().begin(), vec1.get_mem().end()); mem.insert(mem.end(), vec2.get_mem().begin(), vec2.get_mem().end()); + + this->m_meta = vec1.has_meta(); + if (this->m_meta) { + this->init_meta(); + meta.reserve(this->n); + meta.insert(meta.end(), vec1.get_meta().begin(), vec1.get_meta().end()); + meta.insert(meta.end(), vec2.get_meta().begin(), vec2.get_meta().end()); + } } /** @@ -324,6 +526,26 @@ Buffers::Buffers( for (unsigned i = 0; i < map_len; ++i) { mem[map.get(i)] = vec_mem[i]; } + + this->m_meta = vec.has_meta(); + if (this->m_meta) { + this->init_meta(); + + const std::vector vec_meta = vec.get_meta(); + // output is sliced & shuffled from `vec` + meta.reserve(this->n); + if (vec_n < n) { // output is zero-extended & shuffled from `vec` + this->zeros_meta = this->allocator_meta.allocate(meta_size); + std::fill_n(this->zeros_meta, meta_size, 0); + + for (unsigned i = 0; i < n; ++i) { + meta.push_back(zeros_meta); + } + } + for (unsigned i = 0; i < map_len; ++i) { + meta[map.get(i)] = vec_meta[i]; + } + } } template @@ -334,12 +556,53 @@ Buffers::~Buffers() for (int i = 0; i < n; i++) { this->allocator.deallocate(mem[i], size); } + if (this->m_meta) { + for (int i = 0; i < n; i++) { + this->allocator_meta.deallocate(meta[i], meta_size); + } + } } else if (this->mem_alloc_case == BufMemAlloc::ZERO_EXTEND) { this->allocator.deallocate(this->zeros, size); + if (this->m_meta) { + this->allocator_meta.deallocate(this->zeros_meta, meta_size); + } } } } +template +inline void Buffers::init_meta() +{ + meta_size = Buffers::compute_meta_size(size); + meta_bits_nb = sizeof(T); + threshold = mask(meta_bits_nb); + + half_element_mask = mask(CHAR_BIT * sizeof(T) / 2); + half_meta_mask = mask(meta_bits_nb / 2); + if (half_meta_mask == 0) { + half_meta_mask = 1; + } +} + +template +inline void Buffers::allocate_meta(bool init_zero) +{ + meta.reserve(n); + for (int i = 0; i < n; i++) { + meta.push_back(this->allocator_meta.allocate(meta_size)); + } + + if (init_zero) { + reset_meta(); + } +} + +template +inline bool Buffers::has_meta(void) const +{ + return m_meta; +} + template inline int Buffers::get_n(void) const { @@ -353,7 +616,13 @@ inline size_t Buffers::get_size(void) const } template -inline int Buffers::get_mem_len(void) +inline size_t Buffers::get_meta_size(void) const +{ + return meta_size; +} + +template +inline size_t Buffers::get_mem_len(void) { return mem_len; } @@ -361,39 +630,102 @@ inline int Buffers::get_mem_len(void) template void Buffers::zero_fill(void) { - for (int i = 0; i < n; i++) - std::memset(mem[i], 0, size * sizeof(T)); + for (int i = 0; i < n; i++) { + std::fill_n(mem[i], size, 0); + } + reset_meta(); } template void Buffers::fill(int i, T value) { std::fill_n(mem[i], size, value); + if (m_meta) { + std::fill_n(meta[i], meta_size, 0); + } +} + +template +inline T* Buffers::get(int i) +{ + assert(i >= 0 && i < n); + return mem[i]; } template -inline void Buffers::set(int i, T* buf) +inline const T* Buffers::get(int i) const { assert(i >= 0 && i < n); + return mem[i]; +} + +/** Get unpacked `j`th element at the `i`th buffer + * Return two integers `lo` (`hi`) whose + * - low half part is the low (or high) part of the `j`th element + * - high half part is the low (or high) part of the meta of the `j`th element + * + * @param buf_id - index of buffer + * @param ele_id - index of element in the ith buffer + * @param hi - high half part of the unpacked element + * @param lo - low half part of the unpacked element + */ +template +void Buffers::get(int buf_id, size_t ele_id, T& hi, T& lo) const +{ + assert(buf_id >= 0 && buf_id < n); + assert(ele_id >= 0 && ele_id < size); + + T m_value = get_meta(buf_id, ele_id); + T value = mem[buf_id][ele_id]; + const T half = CHAR_BIT * sizeof(T) / 2; + + lo = (value & half_element_mask) | ((m_value & half_meta_mask) << half); - if ((mem_alloc_case == BufMemAlloc::NONE) && (mem[i] != nullptr)) - this->allocator.deallocate(mem[i], size); + value = static_cast(value) >> half; + m_value = static_cast(m_value) >> (meta_bits_nb / 2); - mem[i] = buf; + hi = (value & half_element_mask) | ((m_value & half_meta_mask) << half); } +/** Set `j`th element and its meta, given its unpacked value `lo` (`hi`) whose + * - low half part is the low (or high) part of the `j`th element + * - high half part is the low (or high) part of the meta of the `j`th element + * + * @param buf_id - index of buffer + * @param ele_id - index of element in the ith buffer + * @param hi - high half part of the unpacked element + * @param lo - low half part of the unpacked element + */ template -inline T* Buffers::get(int i) +void Buffers::set(int buf_id, size_t ele_id, T hi, T lo) +{ + assert(buf_id >= 0 && buf_id < n); + assert(ele_id >= 0 && ele_id < size); + + const T half = CHAR_BIT * sizeof(T) / 2; + mem[buf_id][ele_id] = + (lo & half_element_mask) | ((hi & half_element_mask) << half); + + lo = static_cast(lo) >> half; + hi = static_cast(hi) >> half; + T m_value = + (lo & half_meta_mask) | ((hi & half_meta_mask) << (meta_bits_nb / 2)); + + set_meta(buf_id, ele_id, m_value); +} + +template +inline uint8_t* Buffers::get_meta(int i) { assert(i >= 0 && i < n); - return mem[i]; + return meta[i]; } template -inline const T* Buffers::get(int i) const +inline const uint8_t* Buffers::get_meta(int i) const { assert(i >= 0 && i < n); - return mem[i]; + return meta[i]; } template @@ -403,70 +735,162 @@ inline const std::vector& Buffers::get_mem() const } template -inline void Buffers::set_mem(std::vector* mem) +inline const std::vector& Buffers::get_meta() const { - this->mem = mem; + return meta; } +/** Get meta value of the `j`th element at the `i`th buffer + * + * @param buf_id - index of buffer + * @param ele_id - index of element in the ith buffer + * @return meta value + */ template -void Buffers::copy(const Buffers& v) +T Buffers::get_meta(int buf_id, size_t ele_id) const { - assert(v.get_n() == n); - assert(v.get_size() <= size); - size_t v_size = v.get_size(); - for (int i = 0; i < n; i++) - std::copy_n(v.get(i), v_size, mem[i]); + assert(buf_id >= 0 && buf_id < n); + assert(ele_id >= 0 && ele_id < size); + + const uint8_t* meta_arr = meta[buf_id]; + + const size_t bits_nb = ele_id * meta_bits_nb; + // begin meta + const size_t begin_id = bits_nb / CHAR_BIT; + // end meta, inclusively + const size_t end_id = (bits_nb + meta_bits_nb - 1) / CHAR_BIT; + // bit offset at the first meta + const size_t begin_offset = bits_nb % CHAR_BIT; + + // get from the 1st meta + T val = threshold & (static_cast(meta_arr[begin_id]) >> begin_offset); + + // get from next metas, before the last one + for (size_t i = 1; i < end_id - begin_id; ++i) { + const size_t j = i + begin_id; + val |= static_cast(meta_arr[j]) + << (CHAR_BIT - begin_offset + i * CHAR_BIT); + } + // get from the last meta + if (end_id > begin_id) { + const size_t end = (begin_offset + meta_bits_nb) % CHAR_BIT; + val |= mask(end) & meta_arr[end_id]; + } + + return val; +} + +/** Set meta value of the `j`th element at the `i`th buffer + * + * @param buf_id - index of buffer + * @param ele_id - index of element in the ith buffer + * @param val - meta value + */ +template +void Buffers::set_meta(int buf_id, size_t ele_id, T val) +{ + assert(buf_id >= 0 && buf_id < n); + assert(ele_id >= 0 && ele_id < size); + + uint8_t* meta_arr = meta[buf_id]; + + const size_t bits_nb = ele_id * meta_bits_nb; + // begin meta + const size_t begin_id = bits_nb / CHAR_BIT; + // end meta, inclusively + const size_t end_id = (bits_nb + meta_bits_nb - 1) / CHAR_BIT; + // bit offset at the first meta + const size_t begin_offset = bits_nb % CHAR_BIT; + + // set bits of the 1st meta + size_t nb = CHAR_BIT > meta_bits_nb + begin_offset + ? meta_bits_nb + : CHAR_BIT - begin_offset; + set_bits(val, meta_arr[begin_id], nb, begin_offset); + + val = static_cast(val) >> nb; + + // set bits of next metas, before the last one + for (size_t i = 1; i < end_id - begin_id; ++i) { + const size_t j = i + begin_id; + set_bits(val, meta_arr[j], CHAR_BIT); + val = static_cast(val) >> CHAR_BIT; + } + + // get from the last meta + if (end_id > begin_id) { + const size_t end = (begin_offset + meta_bits_nb) % CHAR_BIT; + set_bits(val, meta_arr[end_id], end); + } } template -void Buffers::copy(int i, T* buf) +inline void Buffers::set_mem(std::vector* mem) { - std::copy_n(buf, this->size, mem[i]); + this->mem = mem; } template -void Buffers::separate_even_odd() +void Buffers::copy(const Buffers& v) { - std::vector _mem(n, nullptr); - int half = n / 2; - int j = 0; - int i; - for (i = 0; i < n; i += 2) { - _mem[j] = get(i); // even - _mem[j + half] = get(i + 1); // odd - j++; + assert(v.get_n() == n); + assert(v.get_size() <= size); + size_t v_size = v.get_size(); + for (int i = 0; i < n; ++i) { + std::copy_n(v.get(i), v_size, mem[i]); } - for (i = 0; i < n; i++) { - mem[i] = _mem[i]; + if (v.has_meta()) { + meta_size = v.get_meta_size(); + for (int i = 0; i < n; ++i) { + std::copy_n(v.get_meta(i), meta_size, meta[i]); + } } - _mem.shrink_to_fit(); } template -void Buffers::separate_even_odd(Buffers& even, Buffers& odd) +void Buffers::copy(const Buffers& v, int src_id, int dest_id) { - for (int i = 0, j = 0; i < n; i += 2, ++j) { - even.set(j, get(i)); - odd.set(j, get(i + 1)); + assert(m_meta == v.has_meta()); + + std::copy_n(v.get(src_id), size, mem[dest_id]); + + if (m_meta) { + std::copy_n(v.get_meta(src_id), meta_size, meta[dest_id]); } } template bool operator==(const Buffers& lhs, const Buffers& rhs) { - if (lhs.n != rhs.n || lhs.size != rhs.size) { + if (lhs.n != rhs.n || lhs.get_size() != rhs.get_size() + || lhs.get_meta_size() != rhs.get_meta_size() + || lhs.has_meta() != rhs.has_meta()) { return false; } for (int i = 0; i < lhs.n; i++) { const T* lhs_vec = lhs.get(i); const T* rhs_vec = rhs.get(i); - for (size_t j = 0; j < lhs.size; j++) { + for (size_t j = 0; j < lhs.get_size(); j++) { if (lhs_vec[j] != rhs_vec[j]) { return false; } } } + + if (lhs.has_meta()) { + for (int i = 0; i < lhs.n; i++) { + const uint8_t* lhs_meta = lhs.get_meta(i); + const uint8_t* rhs_meta = rhs.get_meta(i); + + for (size_t j = 0; j < lhs.get_meta_size(); j++) { + if (lhs_meta[j] != rhs_meta[j]) { + return false; + } + } + } + } + return true; } @@ -475,6 +899,83 @@ void Buffers::swap(unsigned i, unsigned j) { using std::swap; swap(mem[i], mem[j]); + if (m_meta) { + swap(meta[i], meta[j]); + } +} + +/** Perform a preparatiion in radix-2 FFT algorithm + * + * @param input - input buffers + * @param scrambler - a vector stores bit-reversed values + * @param data_len - a conven length + * @param group_len - number of elements in which a same operations will be + * performed + */ +template +void Buffers::radix2_fft_prepare( + const Buffers& input, + const std::vector& scrambler, + unsigned data_len, + unsigned group_len) +{ + const unsigned input_len = input.get_n(); + const std::vector& i_mem = input.get_mem(); + const std::vector& i_meta = input.get_meta(); + + for (unsigned idx = 0; idx < input_len; ++idx) { + // set output = scramble(input), i.e. bit reversal ordering + for (unsigned i = scrambler[idx]; i < scrambler[idx] + group_len; ++i) { + std::copy_n(i_mem[idx], size, mem[i]); + m_meta ? std::copy_n(i_meta[idx], meta_size, meta[i]) : nullptr; + } + } + for (unsigned idx = input_len; idx < data_len; ++idx) { + // set output = scramble(input), i.e. bit reversal ordering + for (unsigned i = scrambler[idx]; i < scrambler[idx] + group_len; ++i) { + std::fill_n(mem[i], size, 0); + m_meta ? std::fill_n(meta[i], meta_size, 0) : nullptr; + } + } +} + +/** Perform a preparatiion in radix-2 inverse FFT algorithm + * + * @param input - input buffers + * performed + */ +template +void Buffers::radix2_fft_inv_prepare(const Buffers& input) +{ + const unsigned len = this->n; + const unsigned input_len = input.get_n(); + const std::vector& i_mem = input.get_mem(); + const std::vector& i_meta = input.get_meta(); + + unsigned i; + for (i = 0; i < input_len; ++i) { + std::copy_n(i_mem[i], size, mem[i]); + m_meta ? std::copy_n(i_meta[i], meta_size, meta[i]) : nullptr; + } + + if (input_len < len) { + const unsigned input_len_power_2 = arith::ceil2(input_len); + for (; i < input_len_power_2; ++i) { + std::fill_n(mem[i], size, 0); + m_meta ? std::fill_n(meta[i], meta_size, 0) : nullptr; + } + } +} + +/// Reset meta buffers +template +inline void Buffers::reset_meta() +{ + if (m_meta) { + for (int i = 0; i < n; i++) { + std::fill_n(meta[i], meta_size, 0); + } + } } template @@ -489,7 +990,23 @@ void Buffers::dump(void) std::cout << (get(i))[size - 1]; } } - std::cout << "\n)\n"; + + if (m_meta) { + std::cout << "\nMeta:\n"; + + for (int i = 0; i < n; i++) { + std::cout << "\n\t" << i << ": "; + for (size_t j = 0; j < meta_size - 1; j++) { + std::cout << static_cast((get_meta(i))[j]) << "-"; + } + if (size > 0) { + std::cout << static_cast( + (get_meta(i))[meta_size - 1]); + } + } + } + + std::cout << "\n\n"; } } // namespace vec diff --git a/test/buffers_utest.cpp b/test/buffers_utest.cpp index 26dcf181..7a939ac4 100644 --- a/test/buffers_utest.cpp +++ b/test/buffers_utest.cpp @@ -37,10 +37,22 @@ namespace vec = quadiron::vec; namespace gf = quadiron::gf; +template +bool is_all_zeros(const T* buf, size_t len) +{ + for (size_t i = 0; i < len; ++i) { + if (buf[i] != 0) { + return false; + } + } + return true; +} + template class BuffersTest : public ::testing::Test { public: quadiron::simd::AlignedAllocator allocator; + quadiron::simd::AlignedAllocator allocator_meta; BuffersTest() { @@ -49,19 +61,27 @@ class BuffersTest : public ::testing::Test { ~BuffersTest() = default; std::unique_ptr> - gen_buffers_rand_data(int n, int size, int _max = 0) + gen_buffers_rand_data(int n, int size, bool has_meta = false, int _max = 0) { - T max_val = 65537; - const int max = (_max == 0) ? max_val : _max; - std::uniform_int_distribution dis(0, max - 1); - auto vec = std::make_unique>(n, size); + const T max = (_max == 0) ? std::numeric_limits::max() : _max; + std::uniform_int_distribution dis(0, max - 1); + auto vec = std::make_unique>(n, size, has_meta); + const std::vector mem = vec->get_mem(); for (int i = 0; i < n; i++) { - T* buf = this->allocator.allocate(size); for (int j = 0; j < size; j++) { - buf[j] = dis(quadiron::prng()); + mem[i][j] = dis(quadiron::prng()); + } + } + + if (has_meta) { + const std::vector meta = vec->get_meta(); + const size_t meta_size = vec->get_meta_size(); + for (int i = 0; i < n; ++i) { + for (size_t j = 0; j < meta_size; ++j) { + meta[i][j] = static_cast(dis(quadiron::prng())); + } } - vec->set(i, buf); } return vec; @@ -85,21 +105,6 @@ class BuffersTest : public ::testing::Test { return vec; } - bool check_eq(const T* buf1, const T* buf2, size_t len) - { - return memcmp(buf1, buf2, len * sizeof(T)) == 0; - } - - bool check_all_zeros(const T* buf, size_t len) - { - for (size_t i = 0; i < len; ++i) { - if (buf[i] != 0) { - return false; - } - } - return true; - } - bool check_shuffled_bufs( const vec::Buffers& input, const vec::Buffers& output, @@ -114,7 +119,17 @@ class BuffersTest : public ::testing::Test { std::vector check(output.get_n(), false); for (unsigned i = 0; i < map_len; ++i) { - if (!check_eq(input.get(i), output.get(map.get(i)), size)) { + if (!std::equal( + input.get(i), + input.get(i) + size, + output.get(map.get(i)))) { + return false; + } + if (input.has_meta() + && !std::equal( + input.get_meta(i), + input.get_meta(i) + input.get_meta_size(), + output.get_meta(map.get(i)))) { return false; } check[map.get(i)] = true; @@ -124,7 +139,12 @@ class BuffersTest : public ::testing::Test { if (output_len > input_len) { for (size_t i = 0; i < output_len; ++i) { if (!check[i]) { - if (!check_all_zeros(output.get(i), size)) { + if (!is_all_zeros(output.get(i), size)) { + return false; + } + if (input.has_meta() + && !is_all_zeros( + output.get_meta(i), output.get_meta_size())) { return false; } } @@ -135,97 +155,88 @@ class BuffersTest : public ::testing::Test { } }; -using TestedTypes = ::testing::Types; +using TestedTypes = ::testing::Types; TYPED_TEST_CASE(BuffersTest, TestedTypes); TYPED_TEST(BuffersTest, TestConstructors) // NOLINT { - const int n = 16; - const int begin = 5; - const int end = 12; - const int size = 4; + const std::vector tests = {true, false}; - auto vec1 = this->gen_buffers_rand_data(n, size); - vec::Buffers vec2(*vec1, begin, end); + for (bool const& has_meta : tests) { + const int n = 16; + const int begin = 5; + const int end = 12; + const int size = 4; - const std::vector mem1 = vec1->get_mem(); - const std::vector mem2 = vec2.get_mem(); + auto vec1 = this->gen_buffers_rand_data(n, size, has_meta); + ASSERT_EQ(has_meta, vec1->has_meta()); - ASSERT_EQ(vec2.get_n(), end - begin); - ASSERT_EQ(vec2.get_size(), vec1->get_size()); + const size_t meta_size = vec1->get_meta_size(); - for (int i = 0; i < end - begin; i++) { - for (int j = 0; j < size; j++) { - mem2.at(i)[j] = mem1.at(i + begin)[j]; - } - } + vec::Buffers vec2(*vec1, begin, end); - std::vector mem3(end - begin, nullptr); - for (int i = 0; i < end - begin; i++) { - mem3[i] = mem1.at(i + begin); - } - vec::Buffers vec3(end - begin, size, mem3); + const std::vector mem1 = vec1->get_mem(); + const std::vector mem2 = vec2.get_mem(); - ASSERT_EQ(vec2, vec3); + // Check Slice constructor + ASSERT_EQ(vec2.get_n(), end - begin); + ASSERT_EQ(vec2.get_size(), vec1->get_size()); + for (int i = begin, j = 0; i < end; ++i, ++j) { + ASSERT_TRUE(std::equal(mem1[i], mem1[i] + size, mem2[j])); + } - auto gf(gf::create>(65537)); + if (has_meta) { + ASSERT_EQ(vec2.has_meta(), vec1->has_meta()); + ASSERT_EQ(vec2.get_meta_size(), meta_size); - // no-extension - const int out_n_1 = n - 5; - auto map_1 = this->gen_rand_vector(gf, out_n_1, out_n_1); - vec::Buffers vec4(*vec1, *map_1, out_n_1); - ASSERT_TRUE(this->check_shuffled_bufs(*vec1, vec4, *map_1)); + const std::vector meta1 = vec1->get_meta(); + const std::vector meta2 = vec2.get_meta(); - // extension - const int out_n_2 = n + 10; - auto map_2 = this->gen_rand_vector(gf, n, out_n_2); - vec::Buffers vec5(*vec1, *map_2, out_n_2); - ASSERT_TRUE(this->check_shuffled_bufs(*vec1, vec5, *map_2)); -} + for (int i = begin, j = 0; i < end; ++i, ++j) { + ASSERT_TRUE( + std::equal(meta1[i], meta1[i] + meta_size, meta2[j])); + } + } -TYPED_TEST(BuffersTest, TestEvenOddSeparation) // NOLINT -{ - const int n = 8; - const int size = 32; - const int half = n / 2; - auto vec1 = this->gen_buffers_rand_data(n, size); - vec::Buffers vec2(n, size); - - vec2.copy(*vec1); - - std::vector* even_mem = - new std::vector(half, nullptr); - std::vector* odd_mem = - new std::vector(half, nullptr); - vec::Buffers i_even(half, size, *even_mem); - vec::Buffers i_odd(half, size, *odd_mem); - vec1->separate_even_odd(i_even, i_odd); - - vec1->separate_even_odd(); - - vec::Buffers _i_even(*vec1, 0, half); - vec::Buffers _i_odd(*vec1, half, n); - ASSERT_EQ(i_even, _i_even); - ASSERT_EQ(i_odd, _i_odd); - - const std::vector mem1 = vec1->get_mem(); - const std::vector mem2 = vec2.get_mem(); - - bool ok = true; - for (int i = 0; i < n / 2; i += 2) { - TypeParam* even1 = mem1.at(i); - TypeParam* even2 = mem2.at(i * 2); - TypeParam* odd1 = mem1.at(i + n / 2); - TypeParam* odd2 = mem2.at(i * 2 + 1); - for (int j = 0; j < size; j++) { - if (even1[j] != even2[j] || odd1[j] != odd2[j]) { - ok = false; - i = n; - break; + // Check constructor from given mem and meta + std::vector mem3(end - begin); + for (int i = 0; i < end - begin; i++) { + mem3[i] = this->allocator.allocate(size); + std::copy_n(mem1[i + begin], size, mem3[i]); + } + + if (has_meta) { + const std::vector meta1 = vec1->get_meta(); + std::vector meta3(end - begin); + for (int i = 0; i < end - begin; i++) { + meta3[i] = this->allocator_meta.allocate(meta_size); + std::copy_n(meta1[i + begin], meta_size, meta3[i]); } + + vec::Buffers vec3(end - begin, size, mem3, &meta3); + + ASSERT_EQ(vec2, vec3); + + } else { + vec::Buffers vec3(end - begin, size, mem3); + + ASSERT_EQ(vec2, vec3); } + + auto gf(gf::create>(static_cast(31))); + + // no-extension + const int out_n_1 = n - 5; + auto map_1 = this->gen_rand_vector(gf, out_n_1, out_n_1); + vec::Buffers vec4(*vec1, *map_1, out_n_1); + ASSERT_TRUE(this->check_shuffled_bufs(*vec1, vec4, *map_1)); + + // extension + const int out_n_2 = n + 10; + auto map_2 = this->gen_rand_vector(gf, n, out_n_2); + vec::Buffers vec5(*vec1, *map_2, out_n_2); + ASSERT_TRUE(this->check_shuffled_bufs(*vec1, vec5, *map_2)); } - ASSERT_TRUE(ok); } TYPED_TEST(BuffersTest, TestZeroExtented) // NOLINT @@ -235,23 +246,32 @@ TYPED_TEST(BuffersTest, TestZeroExtented) // NOLINT const int n1 = 4; const int n2 = 10; - auto vec = this->gen_buffers_rand_data(n, size); - vec::Buffers vec1(*vec, n1); - vec::Buffers vec2(*vec, n2); + const std::vector tests = {true, false}; - vec::Buffers _vec1(*vec, 0, n1); - vec::Buffers _vec2(*vec, n2); + for (bool const& has_meta : tests) { + auto vec = this->gen_buffers_rand_data(n, size, has_meta); + // cloned constructor: no zero-padding as `n1 < n` + vec::Buffers vec1(*vec, n1); + // cloned constructor: there are zero-padding as `n2 > n` + vec::Buffers vec2(*vec, n2); - ASSERT_EQ(vec1, _vec1); - ASSERT_EQ(vec2, _vec2); + // slice from `vec` as `n1 < n` + vec::Buffers _vec1(*vec, 0, n1); + // slice and zero-extended from `vec` as `n2 > n` + vec::Buffers _vec2(*vec, 0, n2); - vec::Buffers vec3(vec2, n1); - ASSERT_EQ(vec3, vec1); + ASSERT_EQ(vec1, _vec1); + ASSERT_EQ(vec2, _vec2); + + vec::Buffers vec3(vec2, n); + ASSERT_EQ(vec3, *vec); + } } TYPED_TEST(BuffersTest, TestPackUnpack) // NOLINT { const int iter_count = quadiron::arith::log2(sizeof(TypeParam)); + const std::vector tests = {true, false}; for (int i = 0; i <= iter_count; i++) { const int word_size = quadiron::arith::exp2(i); @@ -259,41 +279,291 @@ TYPED_TEST(BuffersTest, TestPackUnpack) // NOLINT const int size = 32; const int bytes_size = size * word_size; const TypeParam max = (static_cast(1) << word_size) + 1; - auto words = this->gen_buffers_rand_data(n, size, max); - const std::vector mem_T = words->get_mem(); - - // Pack manually from TypeParam to uint8_t. - vec::Buffers vec_char(n, bytes_size); - const std::vector mem_char = vec_char.get_mem(); - for (int j = 0; j < n; j++) { - int t = 0; - TypeParam* buf_T = mem_T.at(j); - uint8_t* buf_char = mem_char.at(j); - - for (int k = 0; k < size; k++) { - TypeParam symb = buf_T[k]; - buf_char[t] = static_cast(symb & 0xFF); - - t++; - for (int u = 1; u < word_size; u++) { - symb >>= 8; + + for (bool const& has_meta : tests) { + auto words = this->gen_buffers_rand_data(n, size, has_meta, max); + + const std::vector mem_T = words->get_mem(); + + // Pack manually from TypeParam to uint8_t. + vec::Buffers vec_char(n, bytes_size); + const std::vector mem_char = vec_char.get_mem(); + for (int j = 0; j < n; j++) { + int t = 0; + TypeParam* buf_T = mem_T.at(j); + uint8_t* buf_char = mem_char.at(j); + + for (int k = 0; k < size; k++) { + TypeParam symb = buf_T[k]; buf_char[t] = static_cast(symb & 0xFF); + t++; + for (int u = 1; u < word_size; u++) { + symb = static_cast(symb) >> 8; + buf_char[t] = static_cast(symb & 0xFF); + t++; + } } } + + // Pack bufs of type uint8_t to bufs of type TypeParam. + vec::Buffers vec_T_tmp(n, size); + const std::vector mem_T_tmp = vec_T_tmp.get_mem(); + + vec::pack( + mem_char, mem_T_tmp, n, size, word_size); + + // Unpack bufs of type TypeParam to bufs of type uint8_t. + vec::Buffers vec_char_tmp(n, bytes_size); + const std::vector mem_char_tmp = vec_char_tmp.get_mem(); + vec::unpack( + mem_T_tmp, mem_char_tmp, n, size, word_size); + + for (int i = 0; i < n; ++i) { + ASSERT_TRUE( + std::equal(mem_T_tmp[i], mem_T_tmp[i] + size, mem_T[i])); + ASSERT_TRUE(std::equal( + mem_char_tmp[i], + mem_char_tmp[i] + bytes_size, + mem_char[i])); + } } + } +} + +TYPED_TEST(BuffersTest, TestCalculateSize) // NOLINT +{ + // Convential size depends on type of word, i.e. TypeParam, hence we use an + // array for expected values. + typedef struct tuple { + size_t size; // in TypeParam words + size_t size_alignment; // in bytes + size_t meta_size_alignment; // in bytes + size_t expected[4]; // 4 values for 4 TypeParams [uint8_t, uint16_t, + // uint32_t, uint64_t] + } tuple; + + std::vector cases = { + // sse + {12, 16, 2, {16, 16, 12, 12}}, + {25, 16, 2, {32, 32, 28, 26}}, + // avx + {12, 32, 4, {32, 16, 16, 12}}, + {25, 32, 4, {32, 32, 32, 28}}, + // whatever + {11, 15, 6, {240, 120, 60, 30}}, + }; + + for (auto const& t : cases) { + const size_t new_size = vec::Buffers::get_conv_size( + t.size, t.size_alignment, t.meta_size_alignment); + const size_t id = quadiron::arith::log2(sizeof(TypeParam)); + + ASSERT_EQ(t.expected[id], new_size); + } +} + +TYPED_TEST(BuffersTest, TestGetSetValueAndMeta) // NOLINT +{ + const int n = 2; + const size_t size = 4; + const size_t meta_size = vec::Buffers::compute_meta_size(size); + + ASSERT_EQ( + meta_size, + (sizeof(TypeParam) == 1) ? 1 : size * sizeof(TypeParam) / CHAR_BIT); + + // vector of meta buffers, as `meta_size` depends on TypeParam + std::vector> compacted_metas = { + // for TypeParam = uint8_t + {0b1010, 0b1111}, + // for TypeParam = uint16_t + {0b10101111, 0b11111010}, + // for TypeParam = uint32_t + {0b10101111, 0b11111010, 0b10101111, 0b11111010}, + // for TypeParam = uint64_t + {0b10101111, + 0b11111010, + 0b10101111, + 0b11111010, + 0b10101111, + 0b11111010, + 0b10101111, + 0b11111010}, + }; + + // vector stores expected meta per elements. These values are respect to + // `metas` + std::vector> expanded_metas = { + // for TypeParam = uint8_t: each element has a meta of 1 bit + {0b0, 0b1, 0b0, 0b1, 0b1, 0b1, 0b1, 0b1}, + // for TypeParam = uint16_t: each element has a meta of 2 bits + {0b11, 0b11, 0b10, 0b10, 0b10, 0b10, 0b11, 0b11}, + // for TypeParam = uint32_t: each element has a meta of 4 bits + {0b1111, 0b1010, 0b1010, 0b1111, 0b1111, 0b1010, 0b1010, 0b1111}, + // for TypeParam = uint64_t: each element has a meta of 8 bits + {0b10101111, + 0b11111010, + 0b10101111, + 0b11111010, + 0b10101111, + 0b11111010, + 0b10101111, + 0b11111010}, + }; + + // data buffers + std::vector> packed_values = { + // for TypeParam = uint8_t + {0x42, 0x51, 0x19, 0x3a, 0xab, 0xdf, 0x1c, 0xa1}, + // for TypeParam = uint16_t + {static_cast(0x4142), + static_cast(0x5051), + static_cast(0x1819), + static_cast(0x393a), + static_cast(0xaaab), + static_cast(0xdedf), + static_cast(0x1b1c), + static_cast(0xa0a1)}, + // for TypeParam = uint32_t + {static_cast(0x41414242), + static_cast(0x50505151), + static_cast(0x18181919), + static_cast(0x39393a3a), + static_cast(0xaaaaabab), + static_cast(0xdededfdf), + static_cast(0x1b1b1c1c), + static_cast(0xa0a0a1a1)}, + // for TypeParam = uint64_t + {static_cast(0x4141414142424242), + static_cast(0x1414141451515151), + static_cast(0x1818181819191919), + static_cast(0x393939393a3a3a3a), + static_cast(0xb9b9b9b9abababab), + static_cast(0xdededededfdfdfdf), + static_cast(0x1b1b1b1b1c1c1c1c), + static_cast(0xa1a0a0a0a1a1a1a1)}, + }; + + typedef struct unpack { + TypeParam hi; + TypeParam lo; + } unpack; + + // vector stores expected unpacked elements + std::vector> unpacked_values = { + // for TypeParam = uint8_t: for each half part + // - first 4 bits from meta + // - last 4 bits from data + {{0x04, 0x02}, + {0x15, 0x11}, + {0x01, 0x09}, + {0x13, 0x1a}, + {0x1a, 0x1b}, + {0x1d, 0x1f}, + {0x11, 0x1c}, + {0x1a, 0x11}}, + // for TypeParam = uint16_t: for each half part + // - first 8 bits from meta + // - last 8 bits from data + {{static_cast(0x0141), static_cast(0x0142)}, + {static_cast(0x0150), static_cast(0x0151)}, + {static_cast(0x0118), static_cast(0x0019)}, + {static_cast(0x0139), static_cast(0x003a)}, + {static_cast(0x01aa), static_cast(0x00ab)}, + {static_cast(0x01de), static_cast(0x00df)}, + {static_cast(0x011b), static_cast(0x011c)}, + {static_cast(0x01a0), static_cast(0x01a1)}}, + // for TypeParam = uint32_t: for each half part + // - first 16 bits from meta + // - last 16 bits from data + {{static_cast(0x34141), static_cast(0x34242)}, + {static_cast(0x25050), static_cast(0x25151)}, + {static_cast(0x21818), static_cast(0x21919)}, + {static_cast(0x33939), static_cast(0x33a3a)}, + {static_cast(0x3aaaa), static_cast(0x3abab)}, + {static_cast(0x2dede), static_cast(0x2dfdf)}, + {static_cast(0x21b1b), static_cast(0x21c1c)}, + {static_cast(0x3a0a0), static_cast(0x3a1a1)}}, + // for TypeParam = uint64_t: for each half part + // - first 32 bits from meta + // - last 32 bits from data + {{static_cast(0xa41414141), + static_cast(0xf42424242)}, + {static_cast(0xf14141414), + static_cast(0xa51515151)}, + {static_cast(0xa18181818), + static_cast(0xf19191919)}, + {static_cast(0xf39393939), + static_cast(0xa3a3a3a3a)}, + {static_cast(0xab9b9b9b9), + static_cast(0xfabababab)}, + {static_cast(0xfdededede), + static_cast(0xadfdfdfdf)}, + {static_cast(0xa1b1b1b1b), + static_cast(0xf1c1c1c1c)}, + {static_cast(0xfa1a0a0a0), + static_cast(0xaa1a1a1a1)}}, + }; + + const size_t id = quadiron::arith::log2(sizeof(TypeParam)); + + const std::vector mem = {packed_values[id].data(), + packed_values[id].data() + size}; + const std::vector meta = {compacted_metas[id].data(), + compacted_metas[id].data() + meta_size}; + + // For checking get functions + vec::Buffers buf1(n, size, mem, &meta); + + // check get_meta + for (int i = 0; i < n; ++i) { + for (size_t j = 0; j < size; ++j) { + const TypeParam got = buf1.get_meta(i, j); + const TypeParam expected = expanded_metas[id][i * size + j]; + ASSERT_EQ(expected, got); + } + } - // Pack bufs of type uint8_t to bufs of type TypeParam. - vec::Buffers vec_T_tmp(n, size); - const std::vector mem_T_tmp = vec_T_tmp.get_mem(); - vec::pack(mem_char, mem_T_tmp, n, size, word_size); - ASSERT_EQ(vec_T_tmp, *words); - - // Unpack bufs of type TypeParam to bufs of type uint8_t. - vec::Buffers vec_char_tmp(n, bytes_size); - const std::vector mem_char_tmp = vec_char_tmp.get_mem(); - vec::unpack( - mem_T_tmp, mem_char_tmp, n, size, word_size); - ASSERT_EQ(vec_char_tmp, vec_char); + // check get unpacked elements + for (int i = 0; i < n; ++i) { + for (size_t j = 0; j < size; ++j) { + TypeParam hi, lo; + buf1.get(i, j, hi, lo); + const unpack expected = unpacked_values[id][i * size + j]; + ASSERT_EQ(expected.hi, hi); + ASSERT_EQ(expected.lo, lo); + } + } + + // For checking set functions + vec::Buffers buf2(n, size, true); + + // set meta & check + for (int i = 0; i < n; ++i) { + for (size_t j = 0; j < size; ++j) { + const TypeParam m_val = expanded_metas[id][i * size + j]; + buf2.set_meta(i, j, m_val); + ASSERT_EQ(m_val, buf2.get_meta(i, j)); + } + const uint8_t* got = buf2.get_meta(i); + const uint8_t* expected = compacted_metas[id].data(); + ASSERT_EQ(compacted_metas[id].size(), n * meta_size); + ASSERT_TRUE(std::equal(got, got + meta_size, expected + i * meta_size)); + } + + // set unpack element & check + for (int i = 0; i < n; ++i) { + for (size_t j = 0; j < size; ++j) { + unpack p = unpacked_values[id][i * size + j]; + buf2.set(i, j, p.hi, p.lo); + TypeParam hi, lo; + buf2.get(i, j, hi, lo); + ASSERT_EQ(p.hi, hi); + ASSERT_EQ(p.lo, lo); + } + const TypeParam* got = buf2.get(i); + const TypeParam* expected = packed_values[id].data(); + ASSERT_TRUE(std::equal(got, got + size, expected + i * size)); } }