1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
/* See LICENSE file for copyright and license details. */
#include "internals.h"
static inline void
zmul_impl_single_char(z_t a, z_t b, z_t c)
{
ENSURE_SIZE(a, 1);
a->used = 1;
a->chars[0] = b->chars[0] * c->chars[0];
SET_SIGNUM(a, 1);
}
void
zmul_impl(z_t a, z_t b, z_t c)
{
/*
* Karatsuba algorithm
*
* Basically, this is how you were taught to multiply large numbers
* by hand in school: 4010⋅3020 = (4000 + 10)(3000 + 20) =
* = 40⋅30⋅10⁴ + (40⋅20 + 30⋅10)⋅10² + 10⋅20, but the middle is
* optimised to only one multiplication:
* 40⋅20 + 30⋅10 = (40 + 10)(30 + 20) − 40⋅30 − 10⋅20.
* This optimisation is crucial. Without it, the algorithm with
* run in O(n²).
*/
#define z2 c_low
#define z1 b_low
#define z0 a
size_t m, m2;
z_t b_high, b_low, c_high, c_low;
if (unlikely(zzero1(b, c))) {
SET_SIGNUM(a, 0);
return;
}
m = zbits(b);
m2 = b == c ? m : zbits(c);
if (m + m2 <= BITS_PER_CHAR) {
zmul_impl_single_char(a, b, c);
return;
}
m = MAX(m, m2);
m2 = m >> 1;
zinit_temp(b_high);
zinit_temp(b_low);
zinit_temp(c_high);
zinit_temp(c_low);
zsplit(b_high, b_low, b, m2);
zsplit(c_high, c_low, c, m2);
zmul_impl(z0, b_low, c_low);
zadd_unsigned_assign(b_low, b_high);
zadd_unsigned_assign(c_low, c_high);
zmul_impl(z1, b_low, c_low);
zmul_impl(z2, b_high, c_high);
zsub_nonnegative_assign(z1, z0);
zsub_nonnegative_assign(z1, z2);
zlsh(z1, z1, m2);
m2 <<= 1;
zlsh(z2, z2, m2);
zadd_unsigned_assign(a, z1);
zadd_unsigned_assign(a, z2);
zfree_temp(c_low);
zfree_temp(c_high);
zfree_temp(b_low);
zfree_temp(b_high);
}
void
zmul(z_t a, z_t b, z_t c)
{
int b_sign, c_sign;
b_sign = b->sign, b->sign *= b_sign;
c_sign = c->sign, c->sign *= c_sign;
zmul_impl(a, b, c);
c->sign *= c_sign;
b->sign *= b_sign;
SET_SIGNUM(a, zsignum(b) * zsignum(c));
}
|