Main Page   Reference Manual   Compound List   File List  

libecc/polynomial.h

Go to the documentation of this file.
00001 //
00012 //
00013 // This file is part of the libecc package.
00014 // Copyright (C) 2002 - 2004 by
00015 //
00016 // Carlo Wood, Run on IRC <carlo@alinoe.com>
00017 // RSA-1024 0x624ACAD5 1997-01-26                    Sign & Encrypt
00018 // Fingerprint16 = 32 EC A7 B6 AC DB 65 A6  F6 F6 55 DD 1C DC FF 61
00019 //
00020 // This program is free software; you can redistribute it and/or
00021 // modify it under the terms of the GNU General Public License
00022 // as published by the Free Software Foundation; either version 2
00023 // of the License, or (at your option) any later version.
00024 //
00025 // This program is distributed in the hope that it will be useful,
00026 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00027 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00028 // GNU General Public License for more details.
00029 //
00030 // You should have received a copy of the GNU General Public License
00031 // along with this program; if not, write to the Free Software
00032 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00033 //
00034 
00035 #ifndef LIBECC_POLYNOMIAL_H
00036 #define LIBECC_POLYNOMIAL_H
00037 
00038 #include <stdexcept>
00039 #include <libecc/bitset.h>
00040 #include <libecc/debug.h>
00041 #if ECC_DEBUGOUTPUT
00042 #include <libcwd/cwprint.h>
00043 #endif
00044 
00045 #if ECC_DEBUG
00046 #define LIBECC_AUGMENTED 1
00047 #define LIBECC_INPLACE (1 || !LIBECC_AUGMENTED)
00048 #define LIBECC_SWAPCOLUMNS (1 || LIBECC_INPLACE)
00049 #else
00050 // Don't change these.
00051 #define LIBECC_AUGMENTED 0
00052 #define LIBECC_INPLACE 1
00053 #define LIBECC_SWAPCOLUMNS 1
00054 #endif
00055 
00056 namespacelibecc {
00057 
00058 // Forward declarations.
00059 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00060   classpolynomial;
00061 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00062   polynomial<m, k, k1, k2> operator*(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00063 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00064   polynomial<m, k, k1, k2> operator/(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00065 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00066   bool operator==(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00067 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00068   bool operator!=(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00069 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00070   std::ostream& operator<<(std::ostream&, polynomial<m, k, k1, k2> const&);
00071 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00072   std::ostream& operator<<(std::ostream&, typename polynomial<m, k, k1, k2>::xor_type const&);
00073 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00074   typename polynomial<m, k, k1, k2>::xor_type operator+(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00075 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00076   typename polynomial<m, k, k1, k2>::xor_type operator-(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00077 
00091 template<unsigned int m, unsigned int k, unsigned int k1 = 0, unsigned int k2 = 0>
00092   classpolynomial {
00093     public:
00097       typedef Operator::bitsetExpression<m, false, false, Operator::bitsetXOR> xor_type;
00098 
00099       // Fix this if you add members in front of M_coefficients.
00100       static size_t const offsetof_vector = bitset<m>::offsetof_vector;
00101 
00102     private:
00103       bitset<m> M_coefficients;
00104       static polynomial<m, k, k1, k2> const one;
00105       static bool S_normal_initialized;
00106       static bitset<m> S_normal;
00107 
00108     public:
00112       static polynomial const& unity(void) { return one; }
00113 
00114     public:
00118       polynomial(void) { }
00119 
00123       explicit polynomial(bitset_digit_t coefficients) : M_coefficients(coefficients) { }
00124 
00128       polynomial(polynomial const& p) : M_coefficients(p.M_coefficients) { }
00129 
00133       explicit polynomial(bitset<m> const& coefficients) : M_coefficients(coefficients) { }
00134 
00138       polynomial(std::string const& coefficients) : M_coefficients(coefficients) { }
00139 
00180       polynomial(xor_type const& expression) : M_coefficients(expression) { }
00181 
00185       polynomial& operator=(polynomial const& p) { M_coefficients = p.M_coefficients; return *this; }
00186 
00190       polynomial& operator=(bitset<m> const& coefficients) { M_coefficients = coefficients; return *this; }
00191 
00196       polynomial& operator=(xor_type const& expression);
00197 
00201       polynomial(polynomial const& b, polynomial const& c);
00202 
00206       static unsigned int const square_digits = 2 * bitset_base<m>::digits + 4;
00207 
00223       polynomial& square(bitset_digit_t* tmpbuf) const; // tmpbuf must be an array of `square_digits' bitset_digit_t.
00224 
00232       bool sqrt(void);
00233 
00234       // The field arithmetic is implemented in terms of operations on the bits.
00238       polynomial& operator+=(polynomial const& p) { M_coefficients ^= p.M_coefficients; return *this; }
00239 
00243       polynomial& operator-=(polynomial const& p) { M_coefficients ^= p.M_coefficients; return *this; }
00244 
00248       polynomial& operator*=(polynomial const& p);
00249 #ifdef LIBECC_DOXYGEN
00250       // Stupid doxygen.
00262       polynomial& operator*=(typename polynomial<m, k, k1, k2>::xor_type const& expr);
00263 #else
00264       // The real prototype.
00265       polynomial& operator*=(xor_type const& expr);
00266 #endif
00267 
00271       polynomial& operator/=(polynomial const& p);
00272 #ifdef LIBECC_DOXYGEN
00273       // Stupid doxygen.
00285       polynomial& operator/=(typename polynomial<m, k, k1, k2>::xor_type const& expr);
00286 #else
00287       // The real prototype.
00288       polynomial& operator/=(xor_type const& expr);
00289 #endif
00290 
00299       static bitset<m> const& normal(void) { if (!S_normal_initialized) calculate_normal(); return S_normal; }
00300 
00312       int trace(void) const
00313      {
00314         // This method was invented by me, so give me credit for it when you use it somewhere. Thank you.
00315         // Carlo Wood <carlo@alinoe.com> -- 4 December 2004.
00316         int tr = 0;
00317         if ((m & 1))
00318           tr = M_coefficients.template test<0>();
00319         if (((m - k) & 1))
00320           tr ^= M_coefficients.template test<m - k>();
00321         if (k1)
00322         {
00323           if (((m - k1) & 1))
00324             tr ^= M_coefficients.template test<m - k1>();
00325           if (((m - k2) & 1))
00326             tr ^= M_coefficients.template test<m - k2>();
00327         }
00328         return tr;
00329       }
00330 
00363       friend xor_type operator+ <>(polynomial const& p1, polynomial const& p2);
00364 
00373       friend xor_type operator- <>(polynomial const& p1, polynomial const& p2);
00374 
00378       friend polynomial operator* <>(polynomial const& p1, polynomial const& p2);
00379 #ifdef LIBECC_DOXYGEN
00380       // Only added for documentational reasons.
00386       friend bool operator*(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00392       friend bool operator*(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00393 #endif
00394 
00398       friend polynomial operator/ <>(polynomial const& p1, polynomial const& p2);
00399 #ifdef LIBECC_DOXYGEN
00400       // Only added for documentational reasons.
00406       friend bool operator/(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00412       friend bool operator/(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00413 #endif
00414 
00418       friend bool operator== <>(polynomial const& p1, polynomial const& p2);
00419 #ifdef LIBECC_DOXYGEN
00420       // Only added for documentational reasons.
00428       friend bool operator==(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00436       friend bool operator==(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00437 #endif
00438 
00442       friend bool operator!= <>(polynomial const& p1, polynomial const& p2);
00443 #ifdef LIBECC_DOXYGEN
00444       // Only added for documentational reasons.
00452       friend bool operator!=(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00460       friend bool operator!=(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00461 #endif
00462 
00468       friend std::ostream& operator<< <>(std::ostream& os, polynomial const& p);
00469 #ifdef LIBECC_DOXYGEN
00470       // Only added for documentational reasons.
00476       friend std::ostream& operator<<(std::ostream& os, polynomial<m, k, k1, k2>::xor_type const& expr);
00477 #endif
00478 
00482       bitset<m> const& get_bitset(void) const{ return M_coefficients; }
00483 
00487       bitset<m>& get_bitset(void) { return M_coefficients; }
00488 
00489     private:
00490       static void reduce(bitset_digit_t* buf);
00491       static bitset_digit_t reducea(bitset_digit_t* a);
00492       static void calculate_normal(void);
00493 
00494       void multiply_with(polynomial const& p1, bitset<m>& result) const;
00495 #if ECC_DEBUG
00496 #if LIBECC_AUGMENTED
00497       void print_matrix(bitset<2 * m> const* matrix, bitset<m> const& pivotted);
00498 #else
00499       void print_matrix(bitset<m> const* matrix, bitset<m> const& pivotted);
00500 #endif
00501 #endif
00502   };
00503 
00504 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00505   polynomial<m, k, k1, k2> const polynomial<m, k, k1, k2>::one(1);
00506 
00507 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00508   bool polynomial<m, k, k1, k2>::sqrt(void)
00509   {
00510     if (!k1)
00511     {
00512       bitset<m> highbits;
00513       highbits.reset();
00514 
00515       // First convert all odd powers into even powers
00516       if ((m & 1) == 1)
00517       {
00518         if ((k & 1) == 1)               // m and k are odd?
00519         {
00520           for(unsigned int bit = 1; bit < m; bit += 2)
00521           {
00522             if (M_coefficients.test(bit))
00523             {
00524               if (bit >= m - k)
00525                 highbits.flip(bit + k - m);
00526               else
00527                 M_coefficients.flip(bit + k);
00528               highbits.flip(bit);
00529             }
00530           }
00531         }
00532         else                    // m is odd and k is even
00533         {
00534           for(unsigned int bit = 1; bit < m; bit += 2)
00535           {
00536             if (M_coefficients.test(bit))
00537             {
00538               if (bit >= m - k)
00539               {
00540                 M_coefficients.flip(bit + 2 * k - m);
00541                 M_coefficients.flip(bit + k - m);
00542               }
00543               else
00544                 M_coefficients.flip(bit + k);
00545               highbits.flip(bit);
00546             }
00547           }
00548         }
00549       }
00550       else if ((k & 1) == 1)    // m is even and k is odd
00551       {
00552         for(unsigned int bit = 1; bit < m; bit += 2)
00553         {
00554           if (M_coefficients.test(bit))
00555           {
00556             if (bit < k)
00557             {
00558               M_coefficients.flip(bit + k);
00559               M_coefficients.flip(bit + m - k);
00560               highbits.flip(bit + m - k);
00561             }
00562             else
00563             {
00564               M_coefficients.flip(bit - k);
00565               highbits.flip(bit - k);
00566             }
00567           }
00568         }
00569       }
00570       else                      // m and k are both even (actually, this should never be used as reduction polynomial).
00571       {
00572         for(unsigned int bit = 1; bit < m; bit += 2)
00573           if (M_coefficients.test(bit))
00574             return false;               // This can't be a square
00575       }
00576 
00577       // Next handle the remaining even powers
00578       unsigned int bit_to = 1;
00579       for(unsigned int bit = 2; bit < m; bit += 2)
00580       {
00581         if (M_coefficients.test(bit))
00582           M_coefficients.set(bit_to);
00583         else
00584           M_coefficients.clear(bit_to);
00585         ++bit_to;
00586       }
00587       for(unsigned int bit = m % 2; bit < m; bit += 2)
00588       {
00589         if (highbits.test(bit))
00590           M_coefficients.set(bit_to);
00591         else
00592           M_coefficients.clear(bit_to);
00593         ++bit_to;
00594       }
00595     }
00596     else
00597     {
00598       structRoot {
00599         polynomial<m, k, k1, k2> value;
00600         Root(polynomial<m, k, k1, k2> const& p) : value(p)
00601         {
00602           bitset_digit_t p2buf[libecc::polynomial<m, k, k1, k2>::square_digits];
00603           polynomial<m, k, k1, k2>& p2 = value.square(p2buf);
00604           bitset_digit_t p4buf[libecc::polynomial<m, k, k1, k2>::square_digits];
00605           polynomial<m, k, k1, k2>& p4 = p2.square(p4buf);
00606           for (unsigned int i = 1; i < m / 2; ++i)
00607           {
00608             p4.square(p2buf);
00609             p2.square(p4buf);
00610           }
00611           value = (m % 2 == 0) ? p2 : p4;
00612         }
00613       };
00614       static Root const root_of_t(polynomial<m, k, k1, k2>(2));
00615       polynomial<m, k, k1, k2> tmp(0);
00616       bitset<m> tmp2;
00617       tmp2.reset();
00618       for(unsigned int bit = 0; bit < m / 2; ++bit)
00619       {
00620         if (M_coefficients.test(2 * bit))
00621           tmp2.set(bit);
00622         if (M_coefficients.test(2 * bit + 1))
00623           tmp.get_bitset().set(bit);
00624       }
00625       if (m % 2 == 1 && M_coefficients.test(m - 1))
00626         tmp2.set(m / 2);
00627       M_coefficients = tmp2;
00628       *this += tmp * root_of_t.value;
00629     }
00630     return true;
00631   }
00632 
00633 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00634   inline polynomial<m, k, k1, k2>&
00635   polynomial<m, k, k1, k2>::operator*=(polynomial const& p)
00636   {
00637     multiply_with(p, M_coefficients);
00638     return *this;
00639   }
00640 
00641 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00642   inline polynomial<m, k, k1, k2>&
00643   polynomial<m, k, k1, k2>::operator*=(typename polynomial<m, k, k1, k2>::xor_type const& expr)
00644   {
00645     return (*this *= polynomial<m, k, k1, k2>(expr));
00646   }
00647 
00648 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00649   inline polynomial<m, k, k1, k2>&
00650   polynomial<m, k, k1, k2>::operator=(xor_type const& expression)
00651   {
00652     M_coefficients = expression;
00653     return *this;
00654   }
00655 
00656 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00657   void
00658   polynomial<m, k, k1, k2>::multiply_with(polynomial const& p1, bitset<m>& result) const
00659  {
00660     bitset_digit_t output[bitset<m>::digits * 2] __attribute__ ((aligned (8)));
00661 
00662     // Find the first non-zero digit in the input polynomial of this object.
00663     unsigned int digit = 0;
00664     while(M_coefficients.digit(digit) == 0)             // Still zero?
00665     {
00666       output[digit] = 0;                                // That means that the output will end on zero too.
00667       if (++digit == bitset<m>::digits)
00668       {
00669         result.reset();                                 // The whole polynomial is zero, the result will be zero too.
00670         return;
00671       }
00672     }
00673     unsigned int uninitialized_digit = digit;           // The next digit of `output' that has not yet been initialized.
00674     // Find the first digit in the input polynomial of this object whose first bit is set.
00675     for(; digit < bitset<m>::digits; ++digit)
00676     {
00677       if ((M_coefficients.digit(digit) & 1))            // Is the first bit set?
00678       {
00679         // Set the output to p1 times this bit.
00680         for (unsigned int d = 0; d < bitset<m>::digits; ++d)
00681           output[d + digit] = p1.get_bitset().digit(d);
00682         uninitialized_digit = bitset<m>::digits + digit;
00683         ++digit;                                        // Set to the next input digit.
00684         break;
00685       }
00686       output[digit] = 0;                                // Initialize this digit of the output to 0.
00687       ++uninitialized_digit;
00688     }
00689     // Set the remaining digits to zero, if any.
00690     for(unsigned int remaining_digit = uninitialized_digit; remaining_digit < sizeof(output) / sizeof(bitset_digit_t); ++remaining_digit)
00691       output[remaining_digit] = 0;
00692     // Find for the remaining input digits the ones that have their first bit set.
00693     for(; digit < bitset<m>::digits; ++digit)
00694       if ((M_coefficients.digit(digit) & 1))            // Is the first bit set?
00695       {
00696         // Add p1 times this bit to the output.
00697         for (unsigned int d = 0; d < bitset<m>::digits; ++d)
00698           output[d + digit] ^= p1.get_bitset().digit(d);
00699       }
00700     // Create a bitset that will contain p1, shifted at most bitset_digit_bits - 1 to the left.
00701     bitset<m + bitset_digit_bits - 1> shifted_p1;
00702     // Start with having it shifted 1 bit to the left.
00703     bitset_digit_t carry = 0;
00704     unsigned int d = 0;
00705     for(bitset_digit_t const* ptr = p1.get_bitset().digits_ptr(); ptr < p1.get_bitset().digits_ptr() + bitset<m>::digits; ++ptr, ++d)
00706     {
00707       shifted_p1.rawdigit(d) = (*ptr << 1) | carry;
00708       carry = *ptr >> (8 * sizeof(bitset_digit_t) - 1);
00709     }
00710     if (d < bitset<m + bitset_digit_bits - 1>::digits)
00711       shifted_p1.rawdigit(d) = carry;
00712     for(bitset_digit_t bitmask = 2;;)
00713     {
00714       for(unsigned int digit = 0; digit < bitset<m>::digits; ++digit)
00715         if ((M_coefficients.digit(digit) & bitmask))
00716         {
00717           for (unsigned int d = 0; d < shifted_p1.digits; ++d)
00718             output[d + digit] ^= shifted_p1.digit(d);
00719         }
00720       bitmask <<= 1;            // Next bit.
00721       if (bitmask == 0)         // Done?
00722         break;
00723       // Shift p1 one bit further to the left.
00724       shifted_p1.template shift_op<1, left, assign>(shifted_p1);
00725     }
00726     // Reduce the resulting output of the multiplication.
00727     reduce(output);
00728     // Copy the reduced output to `result'.
00729     std::memcpy(result.digits_ptr(), output, bitset<m>::digits * sizeof(bitset_digit_t));
00730   }
00731 
00732 #if ECC_DEBUG
00733 template<unsigned int m>
00734 structdiv_tct {
00735   bitset_digit_t const* M_p;
00736   int M_deg;
00737   int M_low;
00738   div_tct(bitset_base<m> const& b, int deg, int low) : M_p(b.digits_ptr()), M_deg(deg), M_low(low) { }
00739   void print_on(std::ostream& os) const
00740  {
00741     int lowbit = (M_low >> bitset_digit_bits_log2) * bitset_digit_bits;
00742     if (lowbit > 0)
00743       lowbit = 0;
00744     for (int b = 2 * m - 1; b >= lowbit; --b)
00745     {
00746       if (b == M_deg)
00747         os << "\e[31m";
00748       int digitoffset = (b >> bitset_digit_bits_log2);
00749       bitset_digit_t mask = static_cast<bitset_digit_t>(1) << (b & (bitset_digit_bits - 1));
00750       if (M_p[digitoffset] & mask)
00751         os << '1';
00752       else
00753         os << '0';
00754       if (b == M_low)
00755         os << "\e[0m";
00756       if (b == 0)
00757         os << '.';
00758     }
00759   }
00760 };
00761 #endif
00762 
00763 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00764   polynomial<m, k, k1, k2>&
00765   polynomial<m, k, k1, k2>::operator/=(polynomial const& p)
00766   {
00767     //std::cout << "Entering polynomial<" << m << ", " << k << ", " << k1 << ", " << k2 << ">::operator/=()" << std::endl;
00768 #if ECC_DEBUG
00769     LibEccDout(dc::polynomial|noprefix_cf, "");
00770     //LibEccDout(dc::polynomial, "Entering polynomial<" << m << ", " << k << ", " << k1 << ", " << k2 << ">::operator/=()");
00771     polynomial<m, k, k1, k2> x(p.get_bitset());
00772     polynomial<m, k, k1, k2> y(M_coefficients);
00773     LibEccDout(dc::polynomial, "x(t) = " << x);
00774     LibEccDout(dc::polynomial|flush_cf, "y(t) = " << y);
00775 #endif
00776 
00777     // The following algorithm is based on the algorithm described on
00778     // page 7 of http://research.sun.com/techrep/2001/smli_tr-2001-95.ps
00779     // with significant optimization changes by Carlo Wood.
00780     // The basis of the algorithm is to keep an invariants valid:
00781     // A * y = U * x  and  B * y = V * x
00782     // while reducing the size of A and B steadily to 0, until either is 1.
00783     // The size of A and B is defined as the distance between the lowest
00784     // and highest 1 in the bitset.
00785 
00786     // Make sure that there is enough space for a full bitset object
00787     // and align the bitsets on a multiple of bitset_digit_t.
00788     static unsigned int const digit_offset_UV = ((sizeof(bitset<m>) * 8 - 1) / bitset_digit_bits + 1);
00789     static unsigned int const offset_UV = digit_offset_UV * bitset_digit_bits;
00790     // Make room for exponents from at least t^-m till t^2m.
00791     static unsigned int const digit_size_UV = 3 * digit_offset_UV;
00792     // Variables A and B do not need this much space.
00793     static unsigned int const digit_size_AB = bitset<m>::digits;
00794     // One digit of padding, needed for xor_with_zero_padded.
00795     static unsigned int const padding_digit_size = 1;
00796     // 'F' (Floating-point polynomial) will be shifted to the right and
00797     // is therefore defined to run from t^-2m till t^2m.  This means it will
00798     // be shifted OVER the other bitsets, but we don't need those anymore anyway.
00799     static unsigned int const offset_F = 2 * offset_UV;
00800     static unsigned int const size_F = 2 * m + offset_F;
00801 
00802     // Declare stack space for four variables.
00803     // This array is going to be used as follows
00804     // (where padding = padding_digit_size; AB = digit_size_AB and do_UV = digit_offset_UV):
00805     //
00806     //          A:->         B:->                U:---->                       V:---->
00807     // [padding][AB][padding][AB][padding][do_UV][do_UV][do_UV][padding][do_UV][do_UV][do_UV][padding]
00808     //                                    <-- digit_size_UV -->         <-- digit_size_UV -->
00809     //                             [do_UV]                       [do_UV]
00810     //                             F:------------------------>   F:------------------------>
00811     //                                           R:---->  *or*                 R:---->
00812     //                             <- offset_F ->                <- offset_F ->
00813     //
00814     // This positioning is done to make the total number of bits that we have to clear in
00815     // as small as possible. R is just an alias for either U or V depending on how the
00816     // algorithm finishes. Note F overlaps with R and they alias eachother, F might also
00817     // overlap with B but that variable isn't used anymore then.
00818     //
00819     // In order to avoid aliasing problems (see http://dbp-consulting.com/StrictAliasing.pdf),
00820     // we put the bitsets in a struct together with bitset_digit_t for the padding and
00821     // use a union to give F a place in all of this. Because objects with constructors
00822     // aren't allowed in union's we have to use bitset_base.
00823     structUV_bitpool_t {
00824       bitset_digit_t offset1[digit_offset_UV];
00825       union{
00826         bitset_digit_t offset2[digit_size_UV - digit_offset_UV];
00827         bitset_base<m> UV;
00828       };
00829     };
00830     structbitpool_t {
00831       bitset_digit_t padding1[padding_digit_size];
00832       bitset_base<m> A;
00833       bitset_digit_t padding2[padding_digit_size];
00834       bitset_base<m> B;
00835       bitset_digit_t padding3[padding_digit_size];
00836       UV_bitpool_t U_bitpool;
00837       bitset_digit_t padding4[padding_digit_size];
00838       UV_bitpool_t V_bitpool;
00839       bitset_digit_t padding5[padding_digit_size];
00840     };
00841     structFU_bitpool_t {
00842       bitset_digit_t padding[3 * padding_digit_size + 2 * digit_size_AB - digit_offset_UV];
00843       bitset_base<size_F> FU;
00844     };
00845     structFV_bitpool_t {
00846       bitset_digit_t padding[4 * padding_digit_size + 2 * digit_size_AB + 2 * digit_offset_UV];
00847       bitset_base<size_F> FV;
00848     };
00849     unionstrict_aliasing_OK_t {
00850       bitpool_t ABUV;
00851       FU_bitpool_t FU;
00852       FV_bitpool_t FV;
00853     };
00854     strict_aliasing_OK_t bitpool __attribute__ ((__aligned__ (ECC_BITS)));
00855     std::memset((char*)&bitpool, 0, sizeof(bitpool));
00856     bitset_base<m>& A(bitpool.ABUV.A);
00857     bitset_base<m>& B(bitpool.ABUV.B);
00858     bitset_base<m>& U(bitpool.ABUV.U_bitpool.UV);
00859     bitset_base<m>& V(bitpool.ABUV.V_bitpool.UV);
00860 
00861 #if ECC_DEBUG
00862     assert(sizeof(strict_aliasing_OK_t) == sizeof(bitpool_t));
00863     assert(sizeof(bitset_base<m>) == digit_size_AB * sizeof(bitset_digit_t));
00864     assert(sizeof(UV_bitpool_t) == digit_size_UV * sizeof(bitset_digit_t));
00865     assert(sizeof(bitpool_t) == (5 * padding_digit_size + 2 * digit_size_AB + 2 * digit_size_UV) * sizeof(bitset_digit_t));
00866     assert((bitset_digit_t*)&bitpool.FU.FU + 2 * digit_offset_UV == (bitset_digit_t*)&U);
00867     assert((bitset_digit_t*)&bitpool.FV.FV + 2 * digit_offset_UV == (bitset_digit_t*)&V);
00868 #endif
00869 
00870     // The representation of U and V will be done with bitsets of size `digit_size_UV * bitset_digit_bits'.
00871     // This means that they contain powers of t with a negative exponent.
00872     // That is not a problem as those are well defined: t^(-n) = 1 / t^n.
00873 
00874     // Let rp = M(t) = t^m + t^k [+ t^k1 + t^k2] + 1.
00875 #if ECC_DEBUG
00876     bitset<m + 1> rp("1");
00877     rp.template set<m>();
00878     rp.template set<k>();
00879     if (k1)
00880     {
00881       rp.template set<k1>();
00882       rp.template set<k2>();
00883     }
00884 #endif
00885 
00886     // Let U(t) = y(t) (= M_coefficients).
00887     LibEccDout(dc::polynomial|flush_cf, "U <- y");
00888     U = M_coefficients;
00889 
00890     // Guess the maximum and minimum powers to be the possible limits.
00891     int degU = m - 1;
00892     int lowU = 0;
00893 
00894     // Let A(t) = x(t).
00895     LibEccDout(dc::polynomial|flush_cf, "A <- x");
00896     A = p.get_bitset();
00897 
00898     // Then
00899     //
00900     // A(t) * y(t) = U(t) * x(t)  [mod M(t)].
00901 
00902     // Let V(t) = 0
00903     // Let B = M(t)
00904     //
00905     // Then
00906     //
00907     // B(t) * y(t) = V(t) * x(t)  [mod M(t)].
00908     //
00909     // Let degA be the highest power of t in A.
00910     typename bitset<m>::const_reverse_iterator degA = A.rbegin();
00911     degA.find1();
00912     LibEccDout(dc::polynomial|flush_cf, "deg(A) == " << degA);
00913 
00914     // Let lowA be the lowest power of t in A.
00915     typename bitset<m>::const_iterator lowA = A.begin();
00916     lowA.find1();
00917     LibEccDout(dc::polynomial|flush_cf, "low(A) == " << lowA);
00918 
00919     unsigned int sizeA = degA.get_index() - lowA.get_index();
00920 
00921     // Let n = m - deg(A).
00922     unsigned int n = m - degA.get_index();
00923     //
00924     // Then B'(t) = B(t) - A(t) * t^n will have a degree less than m.
00925     // And
00926     //
00927     // B'(t) * y(t) = B(t) * y(t) - A(t) * y(t) * t^n =
00928     //              = V(t) * x(t) - U(t) * x(t) * t^n =
00929     //              = (V(t) - U(t) * t^n) * x(t) =
00930     //              = V'(t) * x(t)                      [mod M(t)].
00931     //
00932     // B <- B'
00933     LibEccDout(dc::polynomial|flush_cf, "B <- A * t^" << n << " + " << cwprint_using(rp, &bitset<m+1>::base2_print_on));
00934     B.xor_with_zero_padded(A, lowA.get_index(), degA.get_index(), n);
00935     B.template flip<m>();
00936     B.template flip<k>();
00937     if (k1)
00938     {
00939       B.template flip<k1>();
00940       B.template flip<k2>();
00941     }
00942     B.template flip<0>();
00943 
00944     // Let degB be the highest power of t in B.
00945     typename bitset<m>::const_reverse_iterator degB = B.rbegin();
00946     degB.find1();
00947     LibEccDout(dc::polynomial|flush_cf, "deg(B) == " << degB);
00948 
00949     // Let lowB be the lowest power of t in B.
00950     typename bitset<m>::const_iterator lowB = B.begin();
00951     lowB.find1();
00952     LibEccDout(dc::polynomial|flush_cf, "low(B) == " << lowB);
00953 
00954     // V <- V'
00955     LibEccDout(dc::polynomial|flush_cf, "V <- U * t^" << n <<
00956         "  [mod " << cwprint_using(rp, &bitset<m + 1>::base2_print_on) << "]");
00957     V.xor_with_zero_padded(U, 0, m - 1, n);
00958 
00959     int degV = degU + n;
00960     int lowV = lowU + n;
00961     
00962     unsigned int sizeB = degB.get_index() - lowB.get_index();
00963 
00964     if (sizeA > 0 && sizeB > 0)
00965       for(;;)
00966       {
00967         LibEccDout(dc::polynomial|flush_cf, "A = " << cwprint(div_tct<m>(A, degA.get_index(), lowA.get_index())));
00968         LibEccDout(dc::polynomial|flush_cf, "B = " << cwprint(div_tct<m>(B, degB.get_index(), lowB.get_index())));
00969         LibEccDout(dc::polynomial|flush_cf, "U = " << cwprint(div_tct<m>(U, degU, lowU)));
00970         LibEccDout(dc::polynomial|flush_cf, "V = " << cwprint(div_tct<m>(V, degV, lowV)));
00971         if (sizeA < sizeB)
00972         {
00973           int left_shift = lowB.get_index() - lowA.get_index();
00974           LibEccDout(dc::polynomial|flush_cf, "B <- B + A * t^" << left_shift);
00975           B.xor_with_zero_padded(A, lowA.get_index(), degA.get_index(), left_shift);
00976           degB.find1();
00977           lowB.find1();
00978           sizeB = degB.get_index() - lowB.get_index();
00979           LibEccDout(dc::polynomial|flush_cf, "V <- V + U * t^" << left_shift);
00980           V.xor_with_zero_padded(U, lowU, degU, left_shift);
00981           degV = std::max(degV, degU + left_shift);
00982           lowV = std::min(lowV, lowU + left_shift);
00983           if (sizeB == 0)
00984             break;
00985         }
00986         else
00987         {
00988           int left_shift = lowA.get_index() - lowB.get_index();
00989           LibEccDout(dc::polynomial|flush_cf, "A <- A + B * t^" << left_shift);
00990           A.xor_with_zero_padded(B, lowB.get_index(), degB.get_index(), left_shift);
00991           degA.find1();
00992           lowA.find1();
00993           sizeA = degA.get_index() - lowA.get_index();
00994           LibEccDout(dc::polynomial|flush_cf, "U <- U + V * t^" << left_shift);
00995           U.xor_with_zero_padded(V, lowV, degV, left_shift);
00996           degU = std::max(degU, degV + left_shift);
00997           lowU = std::min(lowU, lowV + left_shift);
00998           if (sizeA == 0)
00999             break;
01000         }
01001       }
01002 
01003     LibEccDout(dc::polynomial|flush_cf, "A = " << cwprint(div_tct<m>(A, degA.get_index(), lowA.get_index())));
01004     LibEccDout(dc::polynomial|flush_cf, "B = " << cwprint(div_tct<m>(B, degB.get_index(), lowB.get_index())));
01005     LibEccDout(dc::polynomial|flush_cf, "U = " << cwprint(div_tct<m>(U, degU, lowU)));
01006     LibEccDout(dc::polynomial|flush_cf, "V = " << cwprint(div_tct<m>(V, degV, lowV)));
01007 
01008     bitset_base<m>* R;
01009     bitset_base<size_F>* F;
01010     int low1, lowR;
01011 #if ECC_DEBUG
01012     int degR;
01013 #endif
01014     if (sizeA == 0)
01015     {
01016       LibEccDout(dc::polynomial|flush_cf, "R = U");
01017       R = &U;
01018       F = &bitpool.FU.FU;
01019       low1 = lowA.get_index();
01020       lowR = lowU;
01021 #if ECC_DEBUG
01022       degR = degU;
01023 #endif
01024     }
01025     else // sizeB == 0
01026     {
01027       LibEccDout(dc::polynomial|flush_cf, "R = V");
01028       R = &V;
01029       F = &bitpool.FV.FV;
01030       low1 = lowB.get_index();
01031       lowR = lowV;
01032 #if ECC_DEBUG
01033       degR = degV;
01034 #endif
01035     }
01036 
01037     *F >>= low1;
01038     lowR -= low1;
01039 #if ECC_DEBUG
01040     degR -= low1;
01041 #endif
01042     // Get rid of negative exponents.
01043     LibEccDout(dc::polynomial|flush_cf, "lowR = " << lowR);
01044     LibEccDout(dc::polynomial|flush_cf, "R = " << cwprint(div_tct<m>(*R, degR, lowR)));
01045     if ((!k1 && k >= bitset_digit_bits) || k2 >= bitset_digit_bits)
01046     {
01047       static int const digit_shift_k2 = k2 >> bitset_digit_bits_log2;
01048       static int const bit_shift_k2 = k2 & (bitset_digit_bits  - 1);
01049       static int const digit_shift_k1 = k1 >> bitset_digit_bits_log2;
01050       static int const bit_shift_k1 = k1 & (bitset_digit_bits  - 1);
01051       static int const digit_shift_k = k >> bitset_digit_bits_log2;
01052       static int const bit_shift_k = k & (bitset_digit_bits  - 1);
01053       static int const digit_shift_m = m >> bitset_digit_bits_log2;
01054       static int const bit_shift_m = m & (bitset_digit_bits  - 1);
01055       static int const DS_minus_bit_shift_k2_with_compile_warning_evasion = (bitset_digit_bits - bit_shift_k2) & (bitset_digit_bits  - 1);
01056       static int const DS_minus_bit_shift_k1_with_compile_warning_evasion = (bitset_digit_bits - bit_shift_k1) & (bitset_digit_bits  - 1);
01057       static int const DS_minus_bit_shift_k_with_compile_warning_evasion = (bitset_digit_bits - bit_shift_k) & (bitset_digit_bits  - 1);
01058       static int const DS_minus_bit_shift_m_with_compile_warning_evasion = (bitset_digit_bits - bit_shift_m) & (bitset_digit_bits  - 1);
01059       int first_digit = (lowR + offset_F) >> bitset_digit_bits_log2;
01060       bitset_digit_t* ptr = F->digits_ptr() + first_digit;
01061       bitset_digit_t* ptr1 = R->digits_ptr();
01062       while(ptr < ptr1)
01063       {
01064         if (k1)
01065         {
01066           ptr[digit_shift_k2] ^= (*ptr) << bit_shift_k2;
01067           if (bit_shift_k2 != 0)
01068             ptr[digit_shift_k2 + 1] ^= (*ptr) >> DS_minus_bit_shift_k2_with_compile_warning_evasion;
01069           ptr[digit_shift_k1] ^= (*ptr) << bit_shift_k1;
01070           if (bit_shift_k1 != 0)
01071             ptr[digit_shift_k1 + 1] ^= (*ptr) >> DS_minus_bit_shift_k1_with_compile_warning_evasion;
01072         }
01073         ptr[digit_shift_k] ^= (*ptr) << bit_shift_k;
01074         if (bit_shift_k != 0)
01075           ptr[digit_shift_k + 1] ^= (*ptr) >> DS_minus_bit_shift_k_with_compile_warning_evasion;
01076         ptr[digit_shift_m] ^= (*ptr) << bit_shift_m;
01077         if (bit_shift_m != 0)
01078           ptr[digit_shift_m + 1] ^= (*ptr) >> DS_minus_bit_shift_m_with_compile_warning_evasion;
01079         ++ptr;
01080       }
01081     }
01082     else
01083     {
01084       for (unsigned int i = lowR + offset_F; i < offset_F; ++i)
01085       {
01086         if (F->test(i))
01087         {
01088 #if ECC_DEBUG
01089           F->flip(i);           // This is not really needed, but prints nicer output below.
01090 #endif
01091           if (k1)
01092           {
01093             F->flip(i + k2);
01094             F->flip(i + k1);
01095           }
01096           F->flip(i + k);
01097           F->flip(i + m);
01098         }
01099       }
01100     }
01101 #if ECC_DEBUG
01102     lowR = 0;
01103     degR = 2 * m - 1;
01104 #endif
01105     LibEccDout(dc::polynomial|flush_cf, "R = " << cwprint(div_tct<m>(*R, degR, lowR)));
01106     reduce(R->digits_ptr());
01107 #if ECC_DEBUG
01108     degR = m - 1;
01109 #endif
01110     LibEccDout(dc::polynomial|flush_cf, "R = " << cwprint(div_tct<m>(*R, degR, lowR)));
01111     *static_cast<bitset_base<m>*>(&M_coefficients) = *R;
01112 
01113     //std::cout << "Leaving polynomial<" << m << ", " << k << ", " << k1 << ", " << k2 << ">::operator/=()" << std::endl;
01114     return *this;
01115   }
01116 
01117 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01118   inline polynomial<m, k, k1, k2>&
01119   polynomial<m, k, k1, k2>::operator/=(typename polynomial<m, k, k1, k2>::xor_type const& expr)
01120   {
01121     return (*this /= polynomial<m, k, k1, k2>(expr));
01122   }
01123 
01124 // Solve x^2 + b x = c.
01125 // Assuming that b != 0, there are 2 solutions: x1 and x1 + b.
01126 // This means that during the 'wiping' of the matrix in order
01127 // to solve x, one bit of x will stay undetermined.  We need
01128 // to take special care to make sure that this will be a bit
01129 // for which a bit of 'b' is set, otherwise we'd return a wrong
01130 // value.
01131 //
01132 // If b equals zero, then the solution is the sqrt(c).  Otherwise
01133 // we can devide both sides of the equation by b^2 and solve
01134 // y^2 + y = c/b^2, and set x = b * y.
01135 //
01136 // There will only be a solution to this equation iff 0 = Tr(c/b^2).
01137 // (simply square the equation m-1 times and add them all up).
01138 //
01139 // Note that if y1 is a solution, then so is y1 + 1, hence we
01140 // cannot determine the least significant bit of y.
01141 //
01142 // It is possible to compose a matrix A such that Ax = x^2 + x
01143 // because squaring is an automorphism of the field:
01144 // x is a sum of basis elements, ie x = b1 + b2 + b3 and
01145 // x^2 = b1^2 + b2^2 + b3^2.  Therefore, if there exists a
01146 // matrix S such that Sb_i = b_i^2 for any basis element then
01147 // A = (S + I).  Moreover, such a matrix S must exist because
01148 // there are exactly m basis elements, and a matrix of mxm
01149 // will always be able to satisfy that.
01150 
01151 #if ECC_DEBUG
01152 // Debug function.
01153 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01154   void polynomial<m, k, k1, k2>::print_matrix(
01155 #if LIBECC_AUGMENTED
01156        bitset<2 * m> const* matrix,
01157 #else
01158        bitset<m> const* matrix,
01159 #endif
01160        bitset<m> const& pivotted)
01161   {
01162     // Print the matrix.
01163     for (unsigned int n = 1; n < m; n *= 10)
01164     {
01165       LibEccDout(dc::gaussj|continued_cf, "  ");
01166       for (unsigned int bit = 0; bit < matrix->number_of_bits; ++bit)
01167       {
01168         if (bit == m)
01169           LibEccDout(dc::continued, ' ');
01170         if ((bit % m) >= 1 && (bit % m) < (m + 1) / 2)
01171           LibEccDout(dc::continued, "+ ");
01172         else if (pivotted.test(bit % m))
01173           LibEccDout(dc::continued, (((bit % m) / n) % 10) << ' ');
01174         else
01175           LibEccDout(dc::continued, "  ");
01176       }
01177       LibEccDout(dc::finish, "");
01178     }
01179     for (unsigned int row = 0; row < m; ++row)
01180     {
01181       std::string line;
01182       if (row >= 1 && row < (m + 1) / 2)
01183         line = "+ ";
01184       else if (pivotted.test(row))
01185         line = "* ";
01186       else
01187         line = "  ";
01188       for (unsigned int bit = 0; bit < matrix->number_of_bits; ++bit)
01189       {
01190         if (bit == m)
01191           line += ' ';
01192         bool isset = matrix[row].test(bit);
01193         bool need_color = LIBECC_INPLACE && (matrix->number_of_bits > m) &&
01194             (((bit % m) >= 1 && (bit % m) < (m + 1) / 2) || pivotted.test(bit % m));
01195         if (need_color)
01196         {
01197           unsigned int corresponding_bit = (bit + m) % (2 * m);
01198           if (isset == matrix[row].test(corresponding_bit))
01199             line += "\e[32m";
01200           else
01201             line += "\e[31m";
01202         }
01203         line += (isset ? '1' : '0');
01204         if (need_color)
01205           line += "\e[0m";
01206         line += ' ';
01207       }
01208       LibEccDout(dc::gaussj, line);
01209     }
01210     LibEccDout(dc::gaussj|noprefix_cf, "");
01211   }
01212 #endif
01213 
01214 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01215   polynomial<m, k, k1, k2>::polynomial(polynomial<m, k, k1, k2> const& b, polynomial<m, k, k1, k2> const& c) :
01216       M_coefficients(0)
01217   {
01218     // If b == 0, then x = sqrt(c).
01219     if (!b.M_coefficients.any())
01220     {
01221       M_coefficients = c.M_coefficients;
01222       sqrt();
01223       return;
01224     }
01225 
01226     // Calculate c/b^2.
01227     bitset_digit_t b2buf[square_digits];
01228     polynomial<m, k, k1, k2>& b2 = b.square(b2buf);
01229     polynomial<m, k, k1, k2> cdb2(c);
01230     cdb2 /= b2;
01231     if (cdb2.trace() == 1)
01232       throw std::domain_error("x^2 + bx = c has no solution");
01233 
01234 #if LIBECC_AUGMENTED
01235     typedef bitset<2 * m> matrixrow_type;
01236 #else
01237     typedef bitset<m> matrixrow_type;
01238 #endif
01239     static matrixrow_type matrix[m];            // A mx2m or mxm matrix.
01240     static bool matrix_initialized;
01241     if (!matrix_initialized)
01242     {
01243       std::memset(matrix, 0, sizeof(matrix));
01244       // Fill this matrix with either the augmented matrix (A|I) or with just A,
01245       // where A is the matrix such that Ax = x^2 + x.
01246       for (unsigned int bit = 0; bit < m; ++bit)
01247       {
01248         matrix[bit].set(bit);           // The I of A = (S + I).
01249 #if LIBECC_AUGMENTED
01250         matrix[bit].set(bit + m);               // The I of (A|I).
01251 #endif
01252       }
01253       for (unsigned int bit = 0; bit < (m + 1) / 2; ++bit)
01254         matrix[2 * bit].flip(bit);      // The square of low exponents.
01255       for (unsigned int bit = (m + 1) / 2; bit < m; ++bit)
01256         matrix[2 * bit - m].set(bit);   // Reduction with m.
01257       for (unsigned int bit = (m + 1) / 2; bit < m - k / 2; ++bit)
01258         matrix[2 * bit - m + k].flip(bit);      // Reduction with m - k.
01259       if (k1)
01260       {
01261         for (unsigned int bit = (m + 1) / 2; bit < m - k1 / 2; ++bit)
01262           matrix[2 * bit - m + k1].flip(bit);   // Reduction with m - k1.
01263         for (unsigned int bit = (m + 1) / 2; bit < m - k2 / 2; ++bit)
01264           matrix[2 * bit - m + k2].flip(bit);   // Reduction with m - k2.
01265       }
01266       for (unsigned int bit = m - k / 2; bit < m; ++bit)
01267       {
01268         matrix[2 * bit - m + k - m].flip(bit);
01269         matrix[2 * bit - m + k - m + k].flip(bit);
01270         if (k1)
01271         {
01272           matrix[2 * bit - m + k - m + k1].flip(bit);
01273           matrix[2 * bit - m + k - m + k2].flip(bit);
01274         }
01275       }
01276       if (k1)
01277       {
01278         for (unsigned int bit = m - k1 / 2; bit < m; ++bit)
01279         {
01280           matrix[2 * bit - m + k1 - m].flip(bit);
01281           matrix[2 * bit - m + k1 - m + k].flip(bit);
01282           matrix[2 * bit - m + k1 - m + k1].flip(bit);
01283           matrix[2 * bit - m + k1 - m + k2].flip(bit);
01284         }
01285         for (unsigned int bit = m - k2 / 2; bit < m; ++bit)
01286         {
01287           matrix[2 * bit - m + k2 - m].flip(bit);
01288           matrix[2 * bit - m + k2 - m + k].flip(bit);
01289           matrix[2 * bit - m + k2 - m + k1].flip(bit);
01290           matrix[2 * bit - m + k2 - m + k2].flip(bit);
01291         }
01292       }
01293 
01294       bitset<m> pivotted;
01295       pivotted.reset();
01296 
01297       LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01298 
01299       // Next, wipe it, so that the left half becomes I.
01300       // The first half is easy.
01301       for (unsigned int wipecol = 1; wipecol < (m + 1) / 2; ++wipecol)
01302       {
01303         matrix[2 * wipecol] ^= matrix[wipecol];
01304 #if LIBECC_INPLACE
01305         matrix[2 * wipecol].set(wipecol);               // Store the inverse in-place, destroying the original.
01306 #endif
01307       }
01308 
01309       // The second half is not.  Use Gauss-Jordan here.
01310       // Note that pivotting is hardly necessary because our arithmetic is infinitely accurate,
01311       // but we still need to find a '1' when we encounter a '0' on the main diagonal of course.
01312       // There will always be at least one '1' in every column, so that partial pivotting suffices
01313       // (speeding up things obviously), with the exception of the case where that '1' is only
01314       // found in row 0 (which is our 'singular' row and needs some special attention).
01315       // The row swapping is done because it is needed if we want to do our work "in-place",
01316       // reducing the amount of memory needed with a factor of two.
01317 
01318       LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01319 
01320       unsigned int rowswaps[m];
01321       rowswaps[0] = 0;
01322       unsigned int colswaps[m], colswaps_inverse[m];
01323       for (unsigned int row = 0; row < m; ++row)
01324       {
01325         colswaps[row] = row;
01326         colswaps_inverse[row] = row;
01327       }
01328 
01329       // Run over all remaining columns and wipe them, immedeately replacing them with the result
01330       // since once a column is wiped we don't need its contents anymore.  Moreover, while wiping
01331       // the column it is optionally swapped with another column at the same time.  This, of course,
01332       // is only done to make the code not understandable anymore for you.
01333 #if LIBECC_SWAPCOLUMNS
01334       for (unsigned int colcnt = (m + 1) / 2; colcnt < m; ++colcnt)
01335 #else
01336       for (unsigned int wipecol = (m + 1) / 2; wipecol < m; ++wipecol)
01337 #endif
01338       {
01339 #if LIBECC_SWAPCOLUMNS
01340         // Find the next row that wasn't already wiped.
01341         unsigned int wipecol = colswaps[colcnt];
01342 #if ECC_DEBUG
01343         LibEccDout(dc::gaussj, "colcnt = " << colcnt);
01344         for (unsigned int row = 0; row < m; ++row)
01345         {
01346           LibEccDout(dc::gaussj, "colswaps[" << row << "] = " << colswaps[row] << "\t\tcolswaps_inverse[" << row << "] = " << colswaps_inverse[row]);
01347           assert(colswaps[colswaps_inverse[row]] == row);
01348           assert(colswaps_inverse[colswaps[row]] == row);
01349         }
01350         LibEccDout(dc::polynomial|noprefix_cf, "");
01351 #endif
01352 #endif
01353 
01354         // First find a suitable row to wipe with.
01355         // This searching is called 'pivotting'.
01356         LibEccDout(dc::gaussj, "Searching for suitable row to wipe with in column " << wipecol);
01357         unsigned int pivotrow;
01358         if (!matrix[wipecol].test(wipecol) || pivotted.test(wipecol))
01359         {
01360           for (pivotrow = wipecol;;)
01361           {
01362             if (++pivotrow == m)
01363             {
01364               if (matrix[0].test(wipecol) && !pivotted.template test<0>())
01365                 pivotrow = 0;
01366               else
01367               {
01368                 for (pivotrow = (m + 1) / 2; pivotrow < wipecol; ++pivotrow)
01369                   if (matrix[pivotrow].test(wipecol) && !pivotted.test(pivotrow))
01370                     break;
01371               }
01372               if (pivotrow == wipecol)
01373               {
01374                 // This happens when we swapped with column 0 (which is all zeroes), for example when m == 14.
01375                 // Just ignore this column.
01376                 pivotrow = m;                   // Flag that we need to continue the main loop.
01377                 pivotted.set(wipecol);
01378                 matrix[wipecol].set(wipecol);   // Copy identity matrix over.
01379                 break;
01380               }
01381             }
01382             if (matrix[pivotrow].test(wipecol) && !pivotted.test(pivotrow))
01383               break;
01384           }
01385           if (pivotrow == m)
01386             continue;
01387         }
01388         else
01389           pivotrow = wipecol;
01390         LibEccDout(dc::gaussj, "Using row " << pivotrow << " to wipe column " << wipecol);
01391         LibEccDout(dc::gaussj, "Before:");
01392         LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01393         pivotted.set(pivotrow);
01394 #if LIBECC_SWAPCOLUMNS
01395         rowswaps[colcnt] = pivotrow;
01396         LibEccDout(dc::gaussj, "Setting rowswaps[" << colcnt << "] to " << pivotrow);
01397 #else
01398         rowswaps[wipecol] = pivotrow;                   // We temporarily use row 'pivotrow' to store row 'wipecol'.
01399         LibEccDout(dc::gaussj, "Setting rowswaps[" << wipecol << "] to " << pivotrow);
01400 #endif
01401         if (pivotrow == wipecol)
01402         {
01403 #if LIBECC_INPLACE
01404           matrix[pivotrow].set(wipecol);                // Store the inverse in-place, destroying the original.
01405 #endif
01406           for (unsigned int row = 0; row < m; ++row)
01407           {
01408             if (row == pivotrow)
01409               continue;
01410             if (matrix[row].test(wipecol))
01411             {
01412 #if LIBECC_INPLACE
01413               matrix[row].clear(wipecol);               // Store the inverse in-place, destroying the original.
01414 #endif
01415               matrix[row] ^= matrix[pivotrow]; 
01416             }
01417           }
01418         }
01419         else
01420         {
01421           // This block contains the main magic.  It's hard to understand I am afraid.
01422           // Basically this does the same as the code block above, but at the same time
01423           // swaps the columns 'wipecol' and 'pivotrow'.
01424 
01425 #if LIBECC_SWAPCOLUMNS
01426           // Swap pivot row bits, and set the bit in pivotrow (thats the identity matrix bit).
01427           if (matrix[pivotrow].test(pivotrow) != matrix[pivotrow].test(wipecol))
01428           {
01429             matrix[pivotrow].flip(wipecol);
01430 #if !LIBECC_INPLACE
01431             matrix[pivotrow].flip(pivotrow);    // No need to flip the 'pivotrow' column when we set it in the next line.
01432 #endif
01433           }
01434 #endif
01435 #if LIBECC_INPLACE
01436           matrix[pivotrow].set(pivotrow);               // Store the inverse in-place, destroying the original.
01437 #endif
01438           for (unsigned int row = 0; row < m; ++row)
01439           {
01440             if (row == pivotrow)                        // Don't wipe the row that we use to wipe.
01441               continue;
01442             matrixrow_type& mrow = matrix[row];
01443             if (mrow.test(wipecol))
01444             {
01445 #if LIBECC_SWAPCOLUMNS
01446               if (!mrow.test(pivotrow))         // If the value in the two columns differ,
01447               {
01448                 mrow.clear(wipecol);            // swap the two values.
01449 #if !LIBECC_INPLACE
01450                 mrow.set(pivotrow);             // No need to set pivotrow when it is overwritten in the next line.
01451 #endif
01452               }
01453 #endif
01454 #if LIBECC_INPLACE
01455               mrow.clear(pivotrow);             // Store the inverse in-place, destroying the original.
01456                                                 // This represents a 0 from the identity matrix.
01457 #endif
01458               mrow ^= matrix[pivotrow];         // Ok, now the columns have been swapped and to-be-wiped column
01459                                                 // has been replaced with a clean identity matrix bit. Perform
01460                                                 // the actual wiping.
01461             }
01462 #if LIBECC_SWAPCOLUMNS
01463             else if (mrow.test(pivotrow))       // Are the pivotrow and wipecol different?
01464             {
01465               mrow.set(wipecol);                // Then flip both, exchanging them effectively. If they
01466               mrow.clear(pivotrow);             //   were the same, consider them exchanged anyway.
01467             }
01468 #endif
01469           }
01470 #if LIBECC_SWAPCOLUMNS
01471           LibEccDout(dc::gaussj, "Also swapped columns " << pivotrow << " and " << wipecol);
01472           // Keep colswaps up to date.  We need colswaps_inverse to do that, therefore
01473           // we need to keep colswaps_inverse up to date too.
01474           std::swap(colswaps[colswaps_inverse[wipecol]], colswaps[colswaps_inverse[pivotrow]]);
01475           std::swap(colswaps_inverse[wipecol], colswaps_inverse[pivotrow]);
01476 #endif
01477         }
01478         LibEccDout(dc::gaussj, "After:");
01479         LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01480       }
01481 
01482 #if ECC_DEBUG
01483       for (unsigned int i = 0; i < m; ++i)
01484       {
01485         if (rowswaps[i] != i)
01486           LibEccDout(dc::gaussj, i << " : " << rowswaps[i]);
01487         // Skip the first half of the matrix.
01488         if (i == 0)
01489           i = (m + 1) / 2 - 1;
01490       }
01491       LibEccDout(dc::gaussj|noprefix_cf, "");
01492 #endif
01493 
01494       if (pivotted.test(0))
01495       {
01496         int row0 = (m + 1) / 2;
01497         while (pivotted.test(row0))
01498           ++row0;
01499         rowswaps[0] = row0;
01500         pivotted.set(row0);
01501       }
01502 
01503       // Next perform some row rotations, in order to get all rows on their correct places again.
01504       for (unsigned int i = 0; i < m; ++i)
01505       {
01506         if (rowswaps[i] != i)
01507         {
01508           unsigned int j = i;
01509           bitset<2 * m> temp = matrix[j];
01510           LibEccDout(dc::gaussj|continued_cf, j);
01511           do
01512           {
01513             matrix[j] = matrix[rowswaps[j]];
01514             LibEccDout(dc::continued, " <-- " << rowswaps[j]);
01515             j = rowswaps[j];
01516           }
01517           while (rowswaps[j] != i);
01518           matrix[j] = temp;
01519           LibEccDout(dc::finish, " <-- " << i);
01520           j = i;
01521           do
01522           {
01523             int pj = j;
01524             j = rowswaps[pj];
01525             // Update the administration so that we won't try to rotate them again.
01526             rowswaps[pj] = pj;
01527           }
01528           while (j != i);
01529         }
01530         // Skip the first half of the matrix.
01531         if (i == 0)
01532           i = (m + 1) / 2 - 1;
01533       }
01534 
01535       LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01536       matrix_initialized = true;
01537     }
01538 
01539     // Multiply the matrix with cdb2.
01540     for (unsigned int row = 0; row < m; ++row)
01541     {
01542 #if LIBECC_AUGMENTED
01543 #if LIBECC_INPLACE
01544       bitset<m> tmp = matrix[row];
01545 #else
01546       bitset<2 * m> tmp2;
01547       matrix[row].template shift_op<m, right, assign>(tmp2);
01548       bitset<m> tmp = tmp2;
01549 #endif
01550       tmp &= cdb2.get_bitset();
01551 #else
01552       bitset<m> tmp = matrix[row] & cdb2.get_bitset();
01553 #endif
01554       if (tmp.odd())
01555         M_coefficients.set(row);
01556     }
01557 
01558     // Finally, multiply with b to get x.
01559     *this *= b;
01560   }
01561 
01562 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01563   inline bool
01564   operator==(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01565   {
01566     return p1.M_coefficients == p2.M_coefficients;
01567   }
01568 
01569 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01570   inline bool
01571   operator==(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01572   {
01573     return polynomial<m, k, k1, k2>(expr) == p2;
01574   }
01575 
01576 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01577   inline bool
01578   operator==(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01579   {
01580     return p1 == polynomial<m, k, k1, k2>(expr);
01581   }
01582 
01583 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01584   inline bool
01585   operator!=(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01586   {
01587     return p1.M_coefficients != p2.M_coefficients;
01588   }
01589 
01590 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01591   inline bool
01592   operator!=(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01593   {
01594     return polynomial<m, k, k1, k2>(expr) != p2;
01595   }
01596 
01597 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01598   inline bool
01599   operator!=(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01600   {
01601     return p1 != polynomial<m, k, k1, k2>(expr);
01602   }
01603 
01604 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01605   inline typename polynomial<m, k, k1, k2>::xor_type
01606   operator+(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01607   {
01608     return typename polynomial<m, k, k1, k2>::xor_type(p1.M_coefficients, p2.M_coefficients);
01609   }
01610 
01611 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01612   inline typename polynomial<m, k, k1, k2>::xor_type
01613   operator-(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01614   {
01615     return typename polynomial<m, k, k1, k2>::xor_type(p1.M_coefficients, p2.M_coefficients);
01616   }
01617 
01618 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01619   inline polynomial<m, k, k1, k2>
01620   operator*(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01621   {
01622     polynomial<m, k, k1, k2> result;
01623     p1.multiply_with(p2, result.M_coefficients);
01624     return result;
01625   }
01626 
01627 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01628   inline polynomial<m, k, k1, k2>
01629   operator*(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01630   {
01631     return polynomial<m, k, k1, k2>(expr) * p2;
01632   }
01633 
01634 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01635   inline polynomial<m, k, k1, k2>
01636   operator*(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01637   {
01638     return p1 * polynomial<m, k, k1, k2>(expr);
01639   }
01640 
01641 
01642 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01643   inline polynomial<m, k, k1, k2>
01644   operator/(polynomial<m, k, k1, k2> const& e1, polynomial<m, k, k1, k2> const& e2)
01645   {
01646     polynomial<m, k, k1, k2> tmp(e1);
01647     tmp /= e2;
01648     return tmp;
01649   }
01650 
01651 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01652   inline polynomial<m, k, k1, k2>
01653   operator/(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01654   {
01655     return polynomial<m, k, k1, k2>(expr) / p2;
01656   }
01657 
01658 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01659   inline polynomial<m, k, k1, k2>
01660   operator/(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01661   {
01662     return p1 / polynomial<m, k, k1, k2>(expr);
01663   }
01664 
01665 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01666   std::ostream& operator<<(std::ostream& os, polynomial<m, k, k1, k2> const& p)
01667   {
01668     p.M_coefficients.base2_print_on(os);
01669     return os;
01670   }
01671 
01672 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01673   std::ostream& operator<<(std::ostream& os, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01674   {
01675     polynomial<m, k, k1, k2> p(expr);
01676     p.M_coefficients.base2_print_on(os);
01677     return os;
01678   }
01679 
01680 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01681   bool polynomial<m, k, k1, k2>::S_normal_initialized;
01682 
01683 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01684   bitset<m> polynomial<m, k, k1, k2>::S_normal;
01685  
01686 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01687   void polynomial<m, k, k1, k2>::calculate_normal(void)
01688   {
01689 #if 0
01690     bitset<m> single_bit(1);
01691     polynomial trace;
01692     bitset_digit_t nextfrob1_buf[square_digits];
01693     bitset_digit_t nextfrob2_buf[square_digits];
01694     polynomial* nextfrob1;
01695     polynomial* nextfrob2;
01696     for (int bit = 0; bit < m; ++bit)
01697     {
01698       trace = single_bit;
01699       nextfrob1 = &trace.square(nextfrob1_buf);
01700       for (int i = 0; i < (m - 1) / 2; ++i)
01701       {
01702         nextfrob2 = &nextfrob1->square(nextfrob2_buf);
01703         trace += *nextfrob1 + *nextfrob2;
01704         if ((m & 1) && i == (m - 3) / 2)
01705           break;
01706         nextfrob1 = &nextfrob2->square(nextfrob1_buf);
01707       }
01708       if (!(m & 1))
01709         trace += *nextfrob1;
01710       if (trace.get_bitset().template test<0>())
01711         S_normal.set(bit);
01712       single_bit.template shift_op<1, libecc::left, libecc::assign>(single_bit);
01713     }
01714 #else
01715     // We can do that faster... I didn't prove this yet, but it works.
01716     if ((m & 1))
01717       S_normal.template set<0>();
01718     if (((m - k) & 1))
01719       S_normal.template set<m - k>();
01720     if (k1)
01721     {
01722       if (((m - k1) & 1))
01723         S_normal.template set<m - k1>();
01724       if (((m - k2) & 1))
01725         S_normal.template set<m - k2>();
01726     }
01727 #endif
01728     S_normal_initialized = true;
01729   }
01730 
01731 } // namespace libecc
01732 
01733 #include <libecc/square.hcc>    // File with different copyright.
01734 
01735 #endif // LIBECC_POLYNOMIAL_H
Copyright © 2002-2008 Carlo Wood.  All rights reserved.