Coverage Report

Created: 2024-08-21 05:08

/workdir/bitcoin/src/bech32.cpp
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2017, 2021 Pieter Wuille
2
// Copyright (c) 2021-2022 The Bitcoin Core developers
3
// Distributed under the MIT software license, see the accompanying
4
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
5
6
#include <bech32.h>
7
#include <util/vector.h>
8
9
#include <array>
10
#include <assert.h>
11
#include <numeric>
12
#include <optional>
13
14
namespace bech32
15
{
16
17
namespace
18
{
19
20
typedef std::vector<uint8_t> data;
21
22
/** The Bech32 and Bech32m character set for encoding. */
23
const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
24
25
/** The Bech32 and Bech32m character set for decoding. */
26
const int8_t CHARSET_REV[128] = {
27
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30
    15, -1, 10, 17, 21, 20, 26, 30,  7,  5, -1, -1, -1, -1, -1, -1,
31
    -1, 29, -1, 24, 13, 25,  9,  8, 23, -1, 18, 22, 31, 27, 19, -1,
32
     1,  0,  3, 16, 11, 28, 12, 14,  6,  4,  2, -1, -1, -1, -1, -1,
33
    -1, 29, -1, 24, 13, 25,  9,  8, 23, -1, 18, 22, 31, 27, 19, -1,
34
     1,  0,  3, 16, 11, 28, 12, 14,  6,  4,  2, -1, -1, -1, -1, -1
35
};
36
37
/** We work with the finite field GF(1024) defined as a degree 2 extension of the base field GF(32)
38
 * The defining polynomial of the extension is x^2 + 9x + 23.
39
 * Let (e) be a root of this defining polynomial. Then (e) is a primitive element of GF(1024),
40
 * that is, a generator of the field. Every non-zero element of the field can then be represented
41
 * as (e)^k for some power k.
42
 * The array GF1024_EXP contains all these powers of (e) - GF1024_EXP[k] = (e)^k in GF(1024).
43
 * Conversely, GF1024_LOG contains the discrete logarithms of these powers, so
44
 * GF1024_LOG[GF1024_EXP[k]] == k.
45
 * The following function generates the two tables GF1024_EXP and GF1024_LOG as constexprs. */
46
constexpr std::pair<std::array<int16_t, 1023>, std::array<int16_t, 1024>> GenerateGFTables()
47
0
{
48
0
    // Build table for GF(32).
49
0
    // We use these tables to perform arithmetic in GF(32) below, when constructing the
50
0
    // tables for GF(1024).
51
0
    std::array<int8_t, 31> GF32_EXP{};
52
0
    std::array<int8_t, 32> GF32_LOG{};
53
0
54
0
    // fmod encodes the defining polynomial of GF(32) over GF(2), x^5 + x^3 + 1.
55
0
    // Because coefficients in GF(2) are binary digits, the coefficients are packed as 101001.
56
0
    const int fmod = 41;
57
0
58
0
    // Elements of GF(32) are encoded as vectors of length 5 over GF(2), that is,
59
0
    // 5 binary digits. Each element (b_4, b_3, b_2, b_1, b_0) encodes a polynomial
60
0
    // b_4*x^4 + b_3*x^3 + b_2*x^2 + b_1*x^1 + b_0 (modulo fmod).
61
0
    // For example, 00001 = 1 is the multiplicative identity.
62
0
    GF32_EXP[0] = 1;
63
0
    GF32_LOG[0] = -1;
64
0
    GF32_LOG[1] = 0;
65
0
    int v = 1;
66
0
    for (int i = 1; i < 31; ++i) {
67
0
        // Multiplication by x is the same as shifting left by 1, as
68
0
        // every coefficient of the polynomial is moved up one place.
69
0
        v = v << 1;
70
0
        // If the polynomial now has an x^5 term, we subtract fmod from it
71
0
        // to remain working modulo fmod. Subtraction is the same as XOR in characteristic
72
0
        // 2 fields.
73
0
        if (v & 32) v ^= fmod;
74
0
        GF32_EXP[i] = v;
75
0
        GF32_LOG[v] = i;
76
0
    }
77
0
78
0
    // Build table for GF(1024)
79
0
    std::array<int16_t, 1023> GF1024_EXP{};
80
0
    std::array<int16_t, 1024> GF1024_LOG{};
81
0
82
0
    GF1024_EXP[0] = 1;
83
0
    GF1024_LOG[0] = -1;
84
0
    GF1024_LOG[1] = 0;
85
0
86
0
    // Each element v of GF(1024) is encoded as a 10 bit integer in the following way:
87
0
    // v = v1 || v0 where v0, v1 are 5-bit integers (elements of GF(32)).
88
0
    // The element (e) is encoded as 1 || 0, to represent 1*(e) + 0. Every other element
89
0
    // a*(e) + b is represented as a || b (a and b are both GF(32) elements). Given (v),
90
0
    // we compute (e)*(v) by multiplying in the following way:
91
0
    //
92
0
    // v0' = 23*v1
93
0
    // v1' = 9*v1 + v0
94
0
    // e*v = v1' || v0'
95
0
    //
96
0
    // Where 23, 9 are GF(32) elements encoded as described above. Multiplication in GF(32)
97
0
    // is done using the log/exp tables:
98
0
    // e^x * e^y = e^(x + y) so a * b = EXP[ LOG[a] + LOG [b] ]
99
0
    // for non-zero a and b.
100
0
101
0
    v = 1;
102
0
    for (int i = 1; i < 1023; ++i) {
103
0
        int v0 = v & 31;
104
0
        int v1 = v >> 5;
105
0
106
0
        int v0n = v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(23)) % 31) : 0;
107
0
        int v1n = (v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(9)) % 31) : 0) ^ v0;
108
0
109
0
        v = v1n << 5 | v0n;
110
0
        GF1024_EXP[i] = v;
111
0
        GF1024_LOG[v] = i;
112
0
    }
113
0
114
0
    return std::make_pair(GF1024_EXP, GF1024_LOG);
115
0
}
116
117
constexpr auto tables = GenerateGFTables();
118
constexpr const std::array<int16_t, 1023>& GF1024_EXP = tables.first;
119
constexpr const std::array<int16_t, 1024>& GF1024_LOG = tables.second;
120
121
/* Determine the final constant to use for the specified encoding. */
122
0
uint32_t EncodingConstant(Encoding encoding) {
123
0
    assert(encoding == Encoding::BECH32 || encoding == Encoding::BECH32M);
124
0
    return encoding == Encoding::BECH32 ? 1 : 0x2bc830a3;
  Branch (124:12): [True: 0, False: 0]
125
0
}
126
127
/** This function will compute what 6 5-bit values to XOR into the last 6 input values, in order to
128
 *  make the checksum 0. These 6 values are packed together in a single 30-bit integer. The higher
129
 *  bits correspond to earlier values. */
130
uint32_t PolyMod(const data& v)
131
0
{
132
    // The input is interpreted as a list of coefficients of a polynomial over F = GF(32), with an
133
    // implicit 1 in front. If the input is [v0,v1,v2,v3,v4], that polynomial is v(x) =
134
    // 1*x^5 + v0*x^4 + v1*x^3 + v2*x^2 + v3*x + v4. The implicit 1 guarantees that
135
    // [v0,v1,v2,...] has a distinct checksum from [0,v0,v1,v2,...].
136
137
    // The output is a 30-bit integer whose 5-bit groups are the coefficients of the remainder of
138
    // v(x) mod g(x), where g(x) is the Bech32 generator,
139
    // x^6 + {29}x^5 + {22}x^4 + {20}x^3 + {21}x^2 + {29}x + {18}. g(x) is chosen in such a way
140
    // that the resulting code is a BCH code, guaranteeing detection of up to 3 errors within a
141
    // window of 1023 characters. Among the various possible BCH codes, one was selected to in
142
    // fact guarantee detection of up to 4 errors within a window of 89 characters.
143
144
    // Note that the coefficients are elements of GF(32), here represented as decimal numbers
145
    // between {}. In this finite field, addition is just XOR of the corresponding numbers. For
146
    // example, {27} + {13} = {27 ^ 13} = {22}. Multiplication is more complicated, and requires
147
    // treating the bits of values themselves as coefficients of a polynomial over a smaller field,
148
    // GF(2), and multiplying those polynomials mod a^5 + a^3 + 1. For example, {5} * {26} =
149
    // (a^2 + 1) * (a^4 + a^3 + a) = (a^4 + a^3 + a) * a^2 + (a^4 + a^3 + a) = a^6 + a^5 + a^4 + a
150
    // = a^3 + 1 (mod a^5 + a^3 + 1) = {9}.
151
152
    // During the course of the loop below, `c` contains the bitpacked coefficients of the
153
    // polynomial constructed from just the values of v that were processed so far, mod g(x). In
154
    // the above example, `c` initially corresponds to 1 mod g(x), and after processing 2 inputs of
155
    // v, it corresponds to x^2 + v0*x + v1 mod g(x). As 1 mod g(x) = 1, that is the starting value
156
    // for `c`.
157
158
    // The following Sage code constructs the generator used:
159
    //
160
    // B = GF(2) # Binary field
161
    // BP.<b> = B[] # Polynomials over the binary field
162
    // F_mod = b**5 + b**3 + 1
163
    // F.<f> = GF(32, modulus=F_mod, repr='int') # GF(32) definition
164
    // FP.<x> = F[] # Polynomials over GF(32)
165
    // E_mod = x**2 + F.fetch_int(9)*x + F.fetch_int(23)
166
    // E.<e> = F.extension(E_mod) # GF(1024) extension field definition
167
    // for p in divisors(E.order() - 1): # Verify e has order 1023.
168
    //    assert((e**p == 1) == (p % 1023 == 0))
169
    // G = lcm([(e**i).minpoly() for i in range(997,1000)])
170
    // print(G) # Print out the generator
171
    //
172
    // It demonstrates that g(x) is the least common multiple of the minimal polynomials
173
    // of 3 consecutive powers (997,998,999) of a primitive element (e) of GF(1024).
174
    // That guarantees it is, in fact, the generator of a primitive BCH code with cycle
175
    // length 1023 and distance 4. See https://en.wikipedia.org/wiki/BCH_code for more details.
176
177
0
    uint32_t c = 1;
178
0
    for (const auto v_i : v) {
  Branch (178:25): [True: 0, False: 0]
179
        // We want to update `c` to correspond to a polynomial with one extra term. If the initial
180
        // value of `c` consists of the coefficients of c(x) = f(x) mod g(x), we modify it to
181
        // correspond to c'(x) = (f(x) * x + v_i) mod g(x), where v_i is the next input to
182
        // process. Simplifying:
183
        // c'(x) = (f(x) * x + v_i) mod g(x)
184
        //         ((f(x) mod g(x)) * x + v_i) mod g(x)
185
        //         (c(x) * x + v_i) mod g(x)
186
        // If c(x) = c0*x^5 + c1*x^4 + c2*x^3 + c3*x^2 + c4*x + c5, we want to compute
187
        // c'(x) = (c0*x^5 + c1*x^4 + c2*x^3 + c3*x^2 + c4*x + c5) * x + v_i mod g(x)
188
        //       = c0*x^6 + c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i mod g(x)
189
        //       = c0*(x^6 mod g(x)) + c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i
190
        // If we call (x^6 mod g(x)) = k(x), this can be written as
191
        // c'(x) = (c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i) + c0*k(x)
192
193
        // First, determine the value of c0:
194
0
        uint8_t c0 = c >> 25;
195
196
        // Then compute c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i:
197
0
        c = ((c & 0x1ffffff) << 5) ^ v_i;
198
199
        // Finally, for each set bit n in c0, conditionally add {2^n}k(x). These constants can be
200
        // computed using the following Sage code (continuing the code above):
201
        //
202
        // for i in [1,2,4,8,16]: # Print out {1,2,4,8,16}*(g(x) mod x^6), packed in hex integers.
203
        //     v = 0
204
        //     for coef in reversed((F.fetch_int(i)*(G % x**6)).coefficients(sparse=True)):
205
        //         v = v*32 + coef.integer_representation()
206
        //     print("0x%x" % v)
207
        //
208
0
        if (c0 & 1)  c ^= 0x3b6a57b2; //     k(x) = {29}x^5 + {22}x^4 + {20}x^3 + {21}x^2 + {29}x + {18}
  Branch (208:13): [True: 0, False: 0]
209
0
        if (c0 & 2)  c ^= 0x26508e6d; //  {2}k(x) = {19}x^5 +  {5}x^4 +     x^3 +  {3}x^2 + {19}x + {13}
  Branch (209:13): [True: 0, False: 0]
210
0
        if (c0 & 4)  c ^= 0x1ea119fa; //  {4}k(x) = {15}x^5 + {10}x^4 +  {2}x^3 +  {6}x^2 + {15}x + {26}
  Branch (210:13): [True: 0, False: 0]
211
0
        if (c0 & 8)  c ^= 0x3d4233dd; //  {8}k(x) = {30}x^5 + {20}x^4 +  {4}x^3 + {12}x^2 + {30}x + {29}
  Branch (211:13): [True: 0, False: 0]
212
0
        if (c0 & 16) c ^= 0x2a1462b3; // {16}k(x) = {21}x^5 +     x^4 +  {8}x^3 + {24}x^2 + {21}x + {19}
  Branch (212:13): [True: 0, False: 0]
213
214
0
    }
215
0
    return c;
216
0
}
217
218
/** Syndrome computes the values s_j = R(e^j) for j in [997, 998, 999]. As described above, the
219
 * generator polynomial G is the LCM of the minimal polynomials of (e)^997, (e)^998, and (e)^999.
220
 *
221
 * Consider a codeword with errors, of the form R(x) = C(x) + E(x). The residue is the bit-packed
222
 * result of computing R(x) mod G(X), where G is the generator of the code. Because C(x) is a valid
223
 * codeword, it is a multiple of G(X), so the residue is in fact just E(x) mod G(x). Note that all
224
 * of the (e)^j are roots of G(x) by definition, so R((e)^j) = E((e)^j).
225
 *
226
 * Let R(x) = r1*x^5 + r2*x^4 + r3*x^3 + r4*x^2 + r5*x + r6
227
 *
228
 * To compute R((e)^j), we are really computing:
229
 * r1*(e)^(j*5) + r2*(e)^(j*4) + r3*(e)^(j*3) + r4*(e)^(j*2) + r5*(e)^j + r6
230
 *
231
 * Now note that all of the (e)^(j*i) for i in [5..0] are constants and can be precomputed.
232
 * But even more than that, we can consider each coefficient as a bit-string.
233
 * For example, take r5 = (b_5, b_4, b_3, b_2, b_1) written out as 5 bits. Then:
234
 * r5*(e)^j = b_1*(e)^j + b_2*(2*(e)^j) + b_3*(4*(e)^j) + b_4*(8*(e)^j) + b_5*(16*(e)^j)
235
 * where all the (2^i*(e)^j) are constants and can be precomputed.
236
 *
237
 * Then we just add each of these corresponding constants to our final value based on the
238
 * bit values b_i. This is exactly what is done in the Syndrome function below.
239
 */
240
0
constexpr std::array<uint32_t, 25> GenerateSyndromeConstants() {
241
0
    std::array<uint32_t, 25> SYNDROME_CONSTS{};
242
0
    for (int k = 1; k < 6; ++k) {
243
0
        for (int shift = 0; shift < 5; ++shift) {
244
0
            int16_t b = GF1024_LOG.at(size_t{1} << shift);
245
0
            int16_t c0 = GF1024_EXP.at((997*k + b) % 1023);
246
0
            int16_t c1 = GF1024_EXP.at((998*k + b) % 1023);
247
0
            int16_t c2 = GF1024_EXP.at((999*k + b) % 1023);
248
0
            uint32_t c = c2 << 20 | c1 << 10 | c0;
249
0
            int ind = 5*(k-1) + shift;
250
0
            SYNDROME_CONSTS[ind] = c;
251
0
        }
252
0
    }
253
0
    return SYNDROME_CONSTS;
254
0
}
255
constexpr std::array<uint32_t, 25> SYNDROME_CONSTS = GenerateSyndromeConstants();
256
257
/**
258
 * Syndrome returns the three values s_997, s_998, and s_999 described above,
259
 * packed into a 30-bit integer, where each group of 10 bits encodes one value.
260
 */
261
0
uint32_t Syndrome(const uint32_t residue) {
262
    // low is the first 5 bits, corresponding to the r6 in the residue
263
    // (the constant term of the polynomial).
264
0
    uint32_t low = residue & 0x1f;
265
266
    // We begin by setting s_j = low = r6 for all three values of j, because these are unconditional.
267
0
    uint32_t result = low ^ (low << 10) ^ (low << 20);
268
269
    // Then for each following bit, we add the corresponding precomputed constant if the bit is 1.
270
    // For example, 0x31edd3c4 is 1100011110 1101110100 1111000100 when unpacked in groups of 10
271
    // bits, corresponding exactly to a^999 || a^998 || a^997 (matching the corresponding values in
272
    // GF1024_EXP above). In this way, we compute all three values of s_j for j in (997, 998, 999)
273
    // simultaneously. Recall that XOR corresponds to addition in a characteristic 2 field.
274
0
    for (int i = 0; i < 25; ++i) {
  Branch (274:21): [True: 0, False: 0]
275
0
        result ^= ((residue >> (5+i)) & 1 ? SYNDROME_CONSTS.at(i) : 0);
  Branch (275:20): [True: 0, False: 0]
276
0
    }
277
0
    return result;
278
0
}
279
280
/** Convert to lower case. */
281
inline unsigned char LowerCase(unsigned char c)
282
0
{
283
0
    return (c >= 'A' && c <= 'Z') ? (c - 'A') + 'a' : c;
  Branch (283:13): [True: 0, False: 0]
  Branch (283:25): [True: 0, False: 0]
284
0
}
285
286
/** Return indices of invalid characters in a Bech32 string. */
287
bool CheckCharacters(const std::string& str, std::vector<int>& errors)
288
0
{
289
0
    bool lower = false, upper = false;
290
0
    for (size_t i = 0; i < str.size(); ++i) {
  Branch (290:24): [True: 0, False: 0]
291
0
        unsigned char c{(unsigned char)(str[i])};
292
0
        if (c >= 'a' && c <= 'z') {
  Branch (292:13): [True: 0, False: 0]
  Branch (292:25): [True: 0, False: 0]
293
0
            if (upper) {
  Branch (293:17): [True: 0, False: 0]
294
0
                errors.push_back(i);
295
0
            } else {
296
0
                lower = true;
297
0
            }
298
0
        } else if (c >= 'A' && c <= 'Z') {
  Branch (298:20): [True: 0, False: 0]
  Branch (298:32): [True: 0, False: 0]
299
0
            if (lower) {
  Branch (299:17): [True: 0, False: 0]
300
0
                errors.push_back(i);
301
0
            } else {
302
0
                upper = true;
303
0
            }
304
0
        } else if (c < 33 || c > 126) {
  Branch (304:20): [True: 0, False: 0]
  Branch (304:30): [True: 0, False: 0]
305
0
            errors.push_back(i);
306
0
        }
307
0
    }
308
0
    return errors.empty();
309
0
}
310
311
std::vector<unsigned char> PreparePolynomialCoefficients(const std::string& hrp, const data& values)
312
0
{
313
0
    data ret;
314
0
    ret.reserve(hrp.size() + 1 + hrp.size() + values.size() + CHECKSUM_SIZE);
315
316
    /** Expand a HRP for use in checksum computation. */
317
0
    for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] >> 5);
  Branch (317:24): [True: 0, False: 0]
318
0
    ret.push_back(0);
319
0
    for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] & 0x1f);
  Branch (319:24): [True: 0, False: 0]
320
321
0
    ret.insert(ret.end(), values.begin(), values.end());
322
323
0
    return ret;
324
0
}
325
326
/** Verify a checksum. */
327
Encoding VerifyChecksum(const std::string& hrp, const data& values)
328
0
{
329
    // PolyMod computes what value to xor into the final values to make the checksum 0. However,
330
    // if we required that the checksum was 0, it would be the case that appending a 0 to a valid
331
    // list of values would result in a new valid list. For that reason, Bech32 requires the
332
    // resulting checksum to be 1 instead. In Bech32m, this constant was amended. See
333
    // https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details.
334
0
    auto enc = PreparePolynomialCoefficients(hrp, values);
335
0
    const uint32_t check = PolyMod(enc);
336
0
    if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32;
  Branch (336:9): [True: 0, False: 0]
337
0
    if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M;
  Branch (337:9): [True: 0, False: 0]
338
0
    return Encoding::INVALID;
339
0
}
340
341
/** Create a checksum. */
342
data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values)
343
0
{
344
0
    auto enc = PreparePolynomialCoefficients(hrp, values);
345
0
    enc.insert(enc.end(), CHECKSUM_SIZE, 0x00);
346
0
    uint32_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 6 zeroes.
347
0
    data ret(CHECKSUM_SIZE);
348
0
    for (size_t i = 0; i < CHECKSUM_SIZE; ++i) {
  Branch (348:24): [True: 0, False: 0]
349
        // Convert the 5-bit groups in mod to checksum values.
350
0
        ret[i] = (mod >> (5 * (5 - i))) & 31;
351
0
    }
352
0
    return ret;
353
0
}
354
355
} // namespace
356
357
/** Encode a Bech32 or Bech32m string. */
358
0
std::string Encode(Encoding encoding, const std::string& hrp, const data& values) {
359
    // First ensure that the HRP is all lowercase. BIP-173 and BIP350 require an encoder
360
    // to return a lowercase Bech32/Bech32m string, but if given an uppercase HRP, the
361
    // result will always be invalid.
362
0
    for (const char& c : hrp) assert(c < 'A' || c > 'Z');
  Branch (362:24): [True: 0, False: 0]
363
364
0
    std::string ret;
365
0
    ret.reserve(hrp.size() + 1 + values.size() + CHECKSUM_SIZE);
366
0
    ret += hrp;
367
0
    ret += '1';
368
0
    for (const uint8_t& i : values) ret += CHARSET[i];
  Branch (368:27): [True: 0, False: 0]
369
0
    for (const uint8_t& i : CreateChecksum(encoding, hrp, values)) ret += CHARSET[i];
  Branch (369:27): [True: 0, False: 0]
370
0
    return ret;
371
0
}
372
373
/** Decode a Bech32 or Bech32m string. */
374
0
DecodeResult Decode(const std::string& str, CharLimit limit) {
375
0
    std::vector<int> errors;
376
0
    if (!CheckCharacters(str, errors)) return {};
  Branch (376:9): [True: 0, False: 0]
377
0
    size_t pos = str.rfind('1');
378
0
    if (str.size() > limit) return {};
  Branch (378:9): [True: 0, False: 0]
379
0
    if (pos == str.npos || pos == 0 || pos + CHECKSUM_SIZE >= str.size()) {
  Branch (379:9): [True: 0, False: 0]
  Branch (379:28): [True: 0, False: 0]
  Branch (379:40): [True: 0, False: 0]
380
0
        return {};
381
0
    }
382
0
    data values(str.size() - 1 - pos);
383
0
    for (size_t i = 0; i < str.size() - 1 - pos; ++i) {
  Branch (383:24): [True: 0, False: 0]
384
0
        unsigned char c = str[i + pos + 1];
385
0
        int8_t rev = CHARSET_REV[c];
386
387
0
        if (rev == -1) {
  Branch (387:13): [True: 0, False: 0]
388
0
            return {};
389
0
        }
390
0
        values[i] = rev;
391
0
    }
392
0
    std::string hrp;
393
0
    hrp.reserve(pos);
394
0
    for (size_t i = 0; i < pos; ++i) {
  Branch (394:24): [True: 0, False: 0]
395
0
        hrp += LowerCase(str[i]);
396
0
    }
397
0
    Encoding result = VerifyChecksum(hrp, values);
398
0
    if (result == Encoding::INVALID) return {};
  Branch (398:9): [True: 0, False: 0]
399
0
    return {result, std::move(hrp), data(values.begin(), values.end() - CHECKSUM_SIZE)};
400
0
}
401
402
/** Find index of an incorrect character in a Bech32 string. */
403
0
std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit) {
404
0
    std::vector<int> error_locations{};
405
406
0
    if (str.size() > limit) {
  Branch (406:9): [True: 0, False: 0]
407
0
        error_locations.resize(str.size() - limit);
408
0
        std::iota(error_locations.begin(), error_locations.end(), static_cast<int>(limit));
409
0
        return std::make_pair("Bech32 string too long", std::move(error_locations));
410
0
    }
411
412
0
    if (!CheckCharacters(str, error_locations)){
  Branch (412:9): [True: 0, False: 0]
413
0
        return std::make_pair("Invalid character or mixed case", std::move(error_locations));
414
0
    }
415
416
0
    size_t pos = str.rfind('1');
417
0
    if (pos == str.npos) {
  Branch (417:9): [True: 0, False: 0]
418
0
        return std::make_pair("Missing separator", std::vector<int>{});
419
0
    }
420
0
    if (pos == 0 || pos + CHECKSUM_SIZE >= str.size()) {
  Branch (420:9): [True: 0, False: 0]
  Branch (420:21): [True: 0, False: 0]
421
0
        error_locations.push_back(pos);
422
0
        return std::make_pair("Invalid separator position", std::move(error_locations));
423
0
    }
424
425
0
    std::string hrp;
426
0
    hrp.reserve(pos);
427
0
    for (size_t i = 0; i < pos; ++i) {
  Branch (427:24): [True: 0, False: 0]
428
0
        hrp += LowerCase(str[i]);
429
0
    }
430
431
0
    size_t length = str.size() - 1 - pos; // length of data part
432
0
    data values(length);
433
0
    for (size_t i = pos + 1; i < str.size(); ++i) {
  Branch (433:30): [True: 0, False: 0]
434
0
        unsigned char c = str[i];
435
0
        int8_t rev = CHARSET_REV[c];
436
0
        if (rev == -1) {
  Branch (436:13): [True: 0, False: 0]
437
0
            error_locations.push_back(i);
438
0
            return std::make_pair("Invalid Base 32 character", std::move(error_locations));
439
0
        }
440
0
        values[i - pos - 1] = rev;
441
0
    }
442
443
    // We attempt error detection with both bech32 and bech32m, and choose the one with the fewest errors
444
    // We can't simply use the segwit version, because that may be one of the errors
445
0
    std::optional<Encoding> error_encoding;
446
0
    for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) {
  Branch (446:28): [True: 0, False: 0]
447
0
        std::vector<int> possible_errors;
448
        // Recall that (expanded hrp + values) is interpreted as a list of coefficients of a polynomial
449
        // over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x).
450
0
        auto enc = PreparePolynomialCoefficients(hrp, values);
451
0
        uint32_t residue = PolyMod(enc) ^ EncodingConstant(encoding);
452
453
        // All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding
454
        // constant) should be 0 - hence 0 indicates there are no errors present.
455
0
        if (residue != 0) {
  Branch (455:13): [True: 0, False: 0]
456
            // If errors are present, our polynomial must be of the form C(x) + E(x) where C is the valid
457
            // codeword (a multiple of G(x)), and E encodes the errors.
458
0
            uint32_t syn = Syndrome(residue);
459
460
            // Unpack the three 10-bit syndrome values
461
0
            int s0 = syn & 0x3FF;
462
0
            int s1 = (syn >> 10) & 0x3FF;
463
0
            int s2 = syn >> 20;
464
465
            // Get the discrete logs of these values in GF1024 for more efficient computation
466
0
            int l_s0 = GF1024_LOG.at(s0);
467
0
            int l_s1 = GF1024_LOG.at(s1);
468
0
            int l_s2 = GF1024_LOG.at(s2);
469
470
            // First, suppose there is only a single error. Then E(x) = e1*x^p1 for some position p1
471
            // Then s0 = E((e)^997) = e1*(e)^(997*p1) and s1 = E((e)^998) = e1*(e)^(998*p1)
472
            // Therefore s1/s0 = (e)^p1, and by the same logic, s2/s1 = (e)^p1 too.
473
            // Hence, s1^2 == s0*s2, which is exactly the condition we check first:
474
0
            if (l_s0 != -1 && l_s1 != -1 && l_s2 != -1 && (2 * l_s1 - l_s2 - l_s0 + 2046) % 1023 == 0) {
  Branch (474:17): [True: 0, False: 0]
  Branch (474:31): [True: 0, False: 0]
  Branch (474:45): [True: 0, False: 0]
  Branch (474:59): [True: 0, False: 0]
475
                // Compute the error position p1 as l_s1 - l_s0 = p1 (mod 1023)
476
0
                size_t p1 = (l_s1 - l_s0 + 1023) % 1023; // the +1023 ensures it is positive
477
                // Now because s0 = e1*(e)^(997*p1), we get e1 = s0/((e)^(997*p1)). Remember that (e)^1023 = 1,
478
                // so 1/((e)^997) = (e)^(1023-997).
479
0
                int l_e1 = l_s0 + (1023 - 997) * p1;
480
                // Finally, some sanity checks on the result:
481
                // - The error position should be within the length of the data
482
                // - e1 should be in GF(32), which implies that e1 = (e)^(33k) for some k (the 31 non-zero elements
483
                // of GF(32) form an index 33 subgroup of the 1023 non-zero elements of GF(1024)).
484
0
                if (p1 < length && !(l_e1 % 33)) {
  Branch (484:21): [True: 0, False: 0]
  Branch (484:36): [True: 0, False: 0]
485
                    // Polynomials run from highest power to lowest, so the index p1 is from the right.
486
                    // We don't return e1 because it is dangerous to suggest corrections to the user,
487
                    // the user should check the address themselves.
488
0
                    possible_errors.push_back(str.size() - p1 - 1);
489
0
                }
490
            // Otherwise, suppose there are two errors. Then E(x) = e1*x^p1 + e2*x^p2.
491
0
            } else {
492
                // For all possible first error positions p1
493
0
                for (size_t p1 = 0; p1 < length; ++p1) {
  Branch (493:37): [True: 0, False: 0]
494
                    // We have guessed p1, and want to solve for p2. Recall that E(x) = e1*x^p1 + e2*x^p2, so
495
                    // s0 = E((e)^997) = e1*(e)^(997^p1) + e2*(e)^(997*p2), and similar for s1 and s2.
496
                    //
497
                    // Consider s2 + s1*(e)^p1
498
                    //          = 2e1*(e)^(999^p1) + e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1
499
                    //          = e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1
500
                    //    (Because we are working in characteristic 2.)
501
                    //          = e2*(e)^(998*p2) ((e)^p2 + (e)^p1)
502
                    //
503
0
                    int s2_s1p1 = s2 ^ (s1 == 0 ? 0 : GF1024_EXP.at((l_s1 + p1) % 1023));
  Branch (503:41): [True: 0, False: 0]
504
0
                    if (s2_s1p1 == 0) continue;
  Branch (504:25): [True: 0, False: 0]
505
0
                    int l_s2_s1p1 = GF1024_LOG.at(s2_s1p1);
506
507
                    // Similarly, s1 + s0*(e)^p1
508
                    //          = e2*(e)^(997*p2) ((e)^p2 + (e)^p1)
509
0
                    int s1_s0p1 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p1) % 1023));
  Branch (509:41): [True: 0, False: 0]
510
0
                    if (s1_s0p1 == 0) continue;
  Branch (510:25): [True: 0, False: 0]
511
0
                    int l_s1_s0p1 = GF1024_LOG.at(s1_s0p1);
512
513
                    // So, putting these together, we can compute the second error position as
514
                    // (e)^p2 = (s2 + s1^p1)/(s1 + s0^p1)
515
                    // p2 = log((e)^p2)
516
0
                    size_t p2 = (l_s2_s1p1 - l_s1_s0p1 + 1023) % 1023;
517
518
                    // Sanity checks that p2 is a valid position and not the same as p1
519
0
                    if (p2 >= length || p1 == p2) continue;
  Branch (519:25): [True: 0, False: 0]
  Branch (519:41): [True: 0, False: 0]
520
521
                    // Now we want to compute the error values e1 and e2.
522
                    // Similar to above, we compute s1 + s0*(e)^p2
523
                    //          = e1*(e)^(997*p1) ((e)^p1 + (e)^p2)
524
0
                    int s1_s0p2 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p2) % 1023));
  Branch (524:41): [True: 0, False: 0]
525
0
                    if (s1_s0p2 == 0) continue;
  Branch (525:25): [True: 0, False: 0]
526
0
                    int l_s1_s0p2 = GF1024_LOG.at(s1_s0p2);
527
528
                    // And compute (the log of) 1/((e)^p1 + (e)^p2))
529
0
                    int inv_p1_p2 = 1023 - GF1024_LOG.at(GF1024_EXP.at(p1) ^ GF1024_EXP.at(p2));
530
531
                    // Then (s1 + s0*(e)^p1) * (1/((e)^p1 + (e)^p2)))
532
                    //         = e2*(e)^(997*p2)
533
                    // Then recover e2 by dividing by (e)^(997*p2)
534
0
                    int l_e2 = l_s1_s0p1 + inv_p1_p2 + (1023 - 997) * p2;
535
                    // Check that e2 is in GF(32)
536
0
                    if (l_e2 % 33) continue;
  Branch (536:25): [True: 0, False: 0]
537
538
                    // In the same way, (s1 + s0*(e)^p2) * (1/((e)^p1 + (e)^p2)))
539
                    //         = e1*(e)^(997*p1)
540
                    // So recover e1 by dividing by (e)^(997*p1)
541
0
                    int l_e1 = l_s1_s0p2 + inv_p1_p2 + (1023 - 997) * p1;
542
                    // Check that e1 is in GF(32)
543
0
                    if (l_e1 % 33) continue;
  Branch (543:25): [True: 0, False: 0]
544
545
                    // Again, we do not return e1 or e2 for safety.
546
                    // Order the error positions from the left of the string and return them
547
0
                    if (p1 > p2) {
  Branch (547:25): [True: 0, False: 0]
548
0
                        possible_errors.push_back(str.size() - p1 - 1);
549
0
                        possible_errors.push_back(str.size() - p2 - 1);
550
0
                    } else {
551
0
                        possible_errors.push_back(str.size() - p2 - 1);
552
0
                        possible_errors.push_back(str.size() - p1 - 1);
553
0
                    }
554
0
                    break;
555
0
                }
556
0
            }
557
0
        } else {
558
            // No errors
559
0
            return std::make_pair("", std::vector<int>{});
560
0
        }
561
562
0
        if (error_locations.empty() || (!possible_errors.empty() && possible_errors.size() < error_locations.size())) {
  Branch (562:13): [True: 0, False: 0]
  Branch (562:41): [True: 0, False: 0]
  Branch (562:69): [True: 0, False: 0]
563
0
            error_locations = std::move(possible_errors);
564
0
            if (!error_locations.empty()) error_encoding = encoding;
  Branch (564:17): [True: 0, False: 0]
565
0
        }
566
0
    }
567
0
    std::string error_message = error_encoding == Encoding::BECH32M ? "Invalid Bech32m checksum"
  Branch (567:33): [True: 0, False: 0]
568
0
                              : error_encoding == Encoding::BECH32 ? "Invalid Bech32 checksum"
  Branch (568:33): [True: 0, False: 0]
569
0
                              : "Invalid checksum";
570
571
0
    return std::make_pair(error_message, std::move(error_locations));
572
0
}
573
574
} // namespace bech32