Skip to content

Commit fd181cc

Browse files
Tom St Denissjaeckel
Tom St Denis
authored andcommitted
added libtommath-0.17
1 parent 14161e8 commit fd181cc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+14772
-6950
lines changed

bn.pdf

1.35 KB
Binary file not shown.

bn.tex

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
\documentclass[]{report}
1+
\documentclass[]{article}
22
\begin{document}
33

4-
\title{LibTomMath v0.16 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org }
4+
\title{LibTomMath v0.17 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org }
55
\author{Tom St Denis \\ tomstdenis@iahu.ca}
66
\maketitle
77
\newpage

bn_fast_mp_invmod.c

+22-51
Original file line numberDiff line numberDiff line change
@@ -27,41 +27,18 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
2727
int res, neg;
2828

2929
/* init all our temps */
30-
if ((res = mp_init (&x)) != MP_OKAY) {
31-
goto __ERR;
32-
}
33-
34-
if ((res = mp_init (&y)) != MP_OKAY) {
35-
goto __X;
36-
}
37-
38-
if ((res = mp_init (&u)) != MP_OKAY) {
39-
goto __Y;
40-
}
41-
42-
if ((res = mp_init (&v)) != MP_OKAY) {
43-
goto __U;
44-
}
45-
46-
if ((res = mp_init (&B)) != MP_OKAY) {
47-
goto __V;
48-
}
49-
50-
if ((res = mp_init (&D)) != MP_OKAY) {
51-
goto __B;
30+
if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
31+
return res;
5232
}
5333

5434
/* x == modulus, y == value to invert */
5535
if ((res = mp_copy (b, &x)) != MP_OKAY) {
56-
goto __D;
57-
}
58-
if ((res = mp_copy (a, &y)) != MP_OKAY) {
59-
goto __D;
36+
goto __ERR;
6037
}
6138

62-
/* we need |y| */
63-
if ((res = mp_abs (&y, &y)) != MP_OKAY) {
64-
goto __D;
39+
/* we need y = |a| */
40+
if ((res = mp_abs (a, &y)) != MP_OKAY) {
41+
goto __ERR;
6542
}
6643

6744
/* 2. [modified] if x,y are both even then return an error!
@@ -70,15 +47,15 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
7047
*/
7148
if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
7249
res = MP_VAL;
73-
goto __D;
50+
goto __ERR;
7451
}
7552

7653
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
7754
if ((res = mp_copy (&x, &u)) != MP_OKAY) {
78-
goto __D;
55+
goto __ERR;
7956
}
8057
if ((res = mp_copy (&y, &v)) != MP_OKAY) {
81-
goto __D;
58+
goto __ERR;
8259
}
8360
mp_set (&D, 1);
8461

@@ -87,57 +64,57 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
8764
while (mp_iseven (&u) == 1) {
8865
/* 4.1 u = u/2 */
8966
if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
90-
goto __D;
67+
goto __ERR;
9168
}
9269
/* 4.2 if A or B is odd then */
9370
if (mp_iseven (&B) == 0) {
9471
if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
95-
goto __D;
72+
goto __ERR;
9673
}
9774
}
9875
/* B = B/2 */
9976
if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
100-
goto __D;
77+
goto __ERR;
10178
}
10279
}
10380

10481
/* 5. while v is even do */
10582
while (mp_iseven (&v) == 1) {
10683
/* 5.1 v = v/2 */
10784
if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
108-
goto __D;
85+
goto __ERR;
10986
}
11087
/* 5.2 if C,D are even then */
11188
if (mp_iseven (&D) == 0) {
11289
/* D = (D-x)/2 */
11390
if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
114-
goto __D;
91+
goto __ERR;
11592
}
11693
}
11794
/* D = D/2 */
11895
if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
119-
goto __D;
96+
goto __ERR;
12097
}
12198
}
12299

123100
/* 6. if u >= v then */
124101
if (mp_cmp (&u, &v) != MP_LT) {
125102
/* u = u - v, B = B - D */
126103
if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
127-
goto __D;
104+
goto __ERR;
128105
}
129106

130107
if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
131-
goto __D;
108+
goto __ERR;
132109
}
133110
} else {
134111
/* v - v - u, D = D - B */
135112
if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
136-
goto __D;
113+
goto __ERR;
137114
}
138115

139116
if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
140-
goto __D;
117+
goto __ERR;
141118
}
142119
}
143120

@@ -151,26 +128,20 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
151128
/* if v != 1 then there is no inverse */
152129
if (mp_cmp_d (&v, 1) != MP_EQ) {
153130
res = MP_VAL;
154-
goto __D;
131+
goto __ERR;
155132
}
156133

157134
/* b is now the inverse */
158135
neg = a->sign;
159136
while (D.sign == MP_NEG) {
160137
if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
161-
goto __D;
138+
goto __ERR;
162139
}
163140
}
164141
mp_exch (&D, c);
165142
c->sign = neg;
166143
res = MP_OKAY;
167144

168-
__D:mp_clear (&D);
169-
__B:mp_clear (&B);
170-
__V:mp_clear (&v);
171-
__U:mp_clear (&u);
172-
__Y:mp_clear (&y);
173-
__X:mp_clear (&x);
174-
__ERR:
145+
__ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
175146
return res;
176147
}

bn_fast_mp_montgomery_reduce.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ int
2626
fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
2727
{
2828
int ix, res, olduse;
29-
mp_word W[512];
29+
mp_word W[MP_WARRAY];
3030

3131
/* get old used count */
3232
olduse = a->used;
@@ -92,7 +92,7 @@ fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
9292

9393
/* inner loop */
9494
for (iy = 0; iy < m->used; iy++) {
95-
*_W++ += ((mp_word) ui) * ((mp_word) * tmpx++);
95+
*_W++ += ((mp_word) ui) * ((mp_word) * tmpx++);
9696
}
9797
}
9898

bn_fast_s_mp_mul_digs.c

+20-18
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,16 @@
1616

1717
/* Fast (comba) multiplier
1818
*
19-
* This is the fast column-array [comba] multiplier. It is designed to compute
20-
* the columns of the product first then handle the carries afterwards. This
21-
* has the effect of making the nested loops that compute the columns very
19+
* This is the fast column-array [comba] multiplier. It is
20+
* designed to compute the columns of the product first
21+
* then handle the carries afterwards. This has the effect
22+
* of making the nested loops that compute the columns very
2223
* simple and schedulable on super-scalar processors.
2324
*
24-
* This has been modified to produce a variable number of digits of output so
25-
* if say only a half-product is required you don't have to compute the upper half
26-
* (a feature required for fast Barrett reduction).
25+
* This has been modified to produce a variable number of
26+
* digits of output so if say only a half-product is required
27+
* you don't have to compute the upper half (a feature
28+
* required for fast Barrett reduction).
2729
*
2830
* Based on Algorithm 14.12 on pp.595 of HAC.
2931
*
@@ -32,7 +34,7 @@ int
3234
fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3335
{
3436
int olduse, res, pa, ix;
35-
mp_word W[512];
37+
mp_word W[MP_WARRAY];
3638

3739
/* grow the destination as required */
3840
if (c->alloc < digs) {
@@ -47,10 +49,9 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
4749
/* calculate the columns */
4850
pa = a->used;
4951
for (ix = 0; ix < pa; ix++) {
50-
51-
/* this multiplier has been modified to allow you to control how many digits
52-
* of output are produced. So at most we want to make upto "digs" digits
53-
* of output.
52+
/* this multiplier has been modified to allow you to
53+
* control how many digits of output are produced.
54+
* So at most we want to make upto "digs" digits of output.
5455
*
5556
* this adds products to distinct columns (at ix+iy) of W
5657
* note that each step through the loop is not dependent on
@@ -73,14 +74,14 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
7374
*/
7475
_W = W + ix;
7576

76-
/* the number of digits is limited by their placement. E.g.
77+
/* the number of digits is limited by their placement. E.g.
7778
we avoid multiplying digits that will end up above the # of
7879
digits of precision requested
7980
*/
8081
pb = MIN (b->used, digs - ix);
8182

8283
for (iy = 0; iy < pb; iy++) {
83-
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
84+
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
8485
}
8586
}
8687

@@ -97,11 +98,12 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
9798
* correct result we must take the extra bits from each column and
9899
* carry them down
99100
*
100-
* Note that while this adds extra code to the multiplier it saves time
101-
* since the carry propagation is removed from the above nested loop.
102-
* This has the effect of reducing the work from N*(N+N*c)==N^2 + c*N^2 to
103-
* N^2 + N*c where c is the cost of the shifting. On very small numbers
104-
* this is slower but on most cryptographic size numbers it is faster.
101+
* Note that while this adds extra code to the multiplier it
102+
* saves time since the carry propagation is removed from the
103+
* above nested loop.This has the effect of reducing the work
104+
* from N*(N+N*c)==N**2 + c*N**2 to N**2 + N*c where c is the
105+
* cost of the shifting. On very small numbers this is slower
106+
* but on most cryptographic size numbers it is faster.
105107
*/
106108
tmpc = c->dp;
107109
for (ix = 1; ix < digs; ix++) {

bn_fast_s_mp_mul_high_digs.c

+12-4
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ int
2727
fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2828
{
2929
int oldused, newused, res, pa, pb, ix;
30-
mp_word W[512];
30+
mp_word W[MP_WARRAY];
3131

3232
/* calculate size of product and allocate more space if required */
3333
newused = a->used + b->used + 1;
@@ -55,15 +55,23 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
5555

5656
/* alias for right side */
5757
tmpy = b->dp + iy;
58-
58+
5959
/* alias for the columns of output. Offset to be equal to or above the
6060
* smallest digit place requested
6161
*/
62-
_W = &(W[digs]);
62+
_W = W + digs;
63+
64+
/* skip cases below zero where ix > digs */
65+
if (iy < 0) {
66+
iy = abs(iy);
67+
tmpy += iy;
68+
_W += iy;
69+
iy = 0;
70+
}
6371

6472
/* compute column products for digits above the minimum */
6573
for (; iy < pb; iy++) {
66-
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
74+
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
6775
}
6876
}
6977
}

bn_fast_s_mp_sqr.c

+9-9
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
* then the carries are computed. This has the effect of making a very simple
2121
* inner loop that is executed the most
2222
*
23-
* W2 represents the outer products and W the inner.
23+
* W2 represents the outer products and W the inner.
2424
*
2525
* A further optimizations is made because the inner products are of the form
2626
* "A * B * 2". The *2 part does not need to be computed until the end which is
@@ -33,7 +33,7 @@ int
3333
fast_s_mp_sqr (mp_int * a, mp_int * b)
3434
{
3535
int olduse, newused, res, ix, pa;
36-
mp_word W2[512], W[512];
36+
mp_word W2[MP_WARRAY], W[MP_WARRAY];
3737

3838
/* calculate size of product and allocate as required */
3939
pa = a->used;
@@ -44,9 +44,9 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
4444
}
4545
}
4646

47-
/* zero temp buffer (columns)
47+
/* zero temp buffer (columns)
4848
* Note that there are two buffers. Since squaring requires
49-
* a outter and inner product and the inner product requires
49+
* a outter and inner product and the inner product requires
5050
* computing a product and doubling it (a relatively expensive
5151
* op to perform n^2 times if you don't have to) the inner and
5252
* outer products are computed in different buffers. This way
@@ -60,7 +60,7 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
6060
* values in W2 are only written in even locations which means
6161
* we can collapse the array to 256 words [and fixup the memset above]
6262
* provided we also fix up the summations below. Ideally
63-
* the fixup loop should be unrolled twice to handle the even/odd
63+
* the fixup loop should be unrolled twice to handle the even/odd
6464
* cases, and then a final step to handle odd cases [e.g. newused == odd]
6565
*
6666
* This will not only save ~8*256 = 2KB of stack but lower the number of
@@ -71,10 +71,10 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
7171
* the multiplication by two is done afterwards in the N loop.
7272
*/
7373
for (ix = 0; ix < pa; ix++) {
74-
/* compute the outer product
74+
/* compute the outer product
7575
*
76-
* Note that every outer product is computed
77-
* for a particular column only once which means that
76+
* Note that every outer product is computed
77+
* for a particular column only once which means that
7878
* there is no need todo a double precision addition
7979
*/
8080
W2[ix + ix] = ((mp_word) a->dp[ix]) * ((mp_word) a->dp[ix]);
@@ -95,7 +95,7 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
9595

9696
/* inner products */
9797
for (iy = ix + 1; iy < pa; iy++) {
98-
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
98+
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
9999
}
100100
}
101101
}

0 commit comments

Comments
 (0)