1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
/* See LICENSE file for copyright and license details. */
#include "internals.h"
static inline void
zsqr_impl_single_char(z_t a, z_t b)
{
ENSURE_SIZE(a, 1);
a->used = 1;
a->chars[0] = b->chars[0] * b->chars[0];
SET_SIGNUM(a, 1);
}
extern void zmul_impl(z_t a, z_t b, z_t c);
void
zsqr_impl(z_t a, z_t b)
{
/*
* Karatsuba algorithm, optimised for equal factors.
*/
#define z2 a
z_t z0, z1, high, low;
zahl_char_t auxchars[3];
size_t bits;
bits = zbits(b);
if (bits <= BITS_PER_CHAR / 2) {
zsqr_impl_single_char(a, b);
return;
}
bits >>= 1;
/* Try to split only at a character level rather than a bit level.
* Such splits are faster, even if bit-level is required, and do
* not require auxiliary memory except for the bit-level split
* which require constant auxiliary memory. */
if (bits < BITS_PER_CHAR) {
low->chars = auxchars;
high->chars = auxchars + 1;
zsplit_unsigned_fast_small_auto(high, low, b, bits);
} else {
bits &= ~(BITS_PER_CHAR - 1);
zsplit_unsigned_fast_large_taint(high, low, b, bits);
}
if (unlikely(zzero(low))) {
zsqr_impl(z2, high);
zlsh(a, z2, bits << 1);
} else {
zinit(z0);
zinit(z1);
zsqr_impl(z0, low);
zmul_impl(z1, low, high);
zlsh(z1, z1, bits + 1);
zsqr_impl(z2, high);
zlsh(a, z2, bits << 1);
zadd_unsigned_assign(a, z1);
zadd_unsigned_assign(a, z0);
zfree(z0);
zfree(z1);
}
}
void
zsqr(z_t a, z_t b)
{
if (unlikely(zzero(b))) {
SET_SIGNUM(a, 0);
} else {
zsqr_impl(a, b);
SET_SIGNUM(a, 1);
}
}
|