mnemonic: implement Wordlist class

Wordlist subclasses 'tuple' so it can be transparently used. 'in' and '.index()' are fast. Use Wordlist in bip39_is_checksum_valid, which makes that faster.
2025-08-23 17:47:31 +00:00 · 2020-02-29 00:20:11 +01:00 · 2020-02-29 00:20:11 +01:00 · a0b096dcb2
commit a0b096dcb2
parent e1dcdde272
5 changed files with 59 additions and 35 deletions
--- a/electrum/gui/kivy/uix/dialogs/installwizard.py
+++ b/electrum/gui/kivy/uix/dialogs/installwizard.py
@ -859,7 +859,7 @@ class RestoreSeedDialog(WizardDialog):
        super(RestoreSeedDialog, self).__init__(wizard, **kwargs)
        self._test = kwargs['test']
        from electrum.mnemonic import Mnemonic
-        from electrum.old_mnemonic import words as old_wordlist
+        from electrum.old_mnemonic import wordlist as old_wordlist
        self.words = set(Mnemonic('en').wordlist).union(set(old_wordlist))
        self.ids.text_input_seed.text = test_seed if is_test else ''
        self.message = _('Please type your seed phrase using the virtual keyboard.')
--- a/electrum/gui/qt/seed_dialog.py
+++ b/electrum/gui/qt/seed_dialog.py
@ -30,7 +30,7 @@ from PyQt5.QtWidgets import (QVBoxLayout, QCheckBox, QHBoxLayout, QLineEdit,
 from electrum.i18n import _
 from electrum.mnemonic import Mnemonic, seed_type
-import electrum.old_mnemonic
+from electrum import old_mnemonic
 from .util import (Buttons, OkButton, WWLabel, ButtonsTextEdit, icon_path,
                   EnterButton, CloseButton, WindowModalDialog, ColorScheme)
@ -150,7 +150,7 @@ class SeedLayout(QVBoxLayout):
    def initialize_completer(self):
        bip39_english_list = Mnemonic('en').wordlist
-        old_list = electrum.old_mnemonic.words
+        old_list = old_mnemonic.wordlist
        only_old_list = set(old_list) - set(bip39_english_list)
        self.wordlist = list(bip39_english_list) + list(only_old_list)  # concat both lists
        self.wordlist.sort()
--- a/electrum/keystore.py
+++ b/electrum/keystore.py
@ -42,7 +42,7 @@ from .crypto import (pw_decode, pw_encode, sha256, sha256d, PW_HASH_VERSION_LATE
                     SUPPORTED_PW_HASH_VERSIONS, UnsupportedPasswordHashVersion, hash_160)
 from .util import (InvalidPassword, WalletFileException,
                   BitcoinException, bh2u, bfh, inv_dict, is_hex_str)
-from .mnemonic import Mnemonic, load_wordlist, seed_type, is_seed
+from .mnemonic import Mnemonic, Wordlist, seed_type, is_seed
 from .plugin import run_hook
 from .logging import Logger
@ -811,7 +811,7 @@ def bip39_is_checksum_valid(mnemonic: str) -> Tuple[bool, bool]:
    """
    words = [ normalize('NFKD', word) for word in mnemonic.split() ]
    words_len = len(words)
-    wordlist = load_wordlist("english.txt")
+    wordlist = Wordlist.from_file("english.txt")
    n = len(wordlist)
    i = 0
    words.reverse()
--- a/electrum/mnemonic.py
+++ b/electrum/mnemonic.py
@ -27,6 +27,8 @@ import math
 import hashlib
 import unicodedata
 import string
 from typing import Sequence, Dict
 from types import MappingProxyType
 from .util import resource_path, bfh, bh2u, randrange
 from .crypto import hmac_oneshot
@ -88,28 +90,48 @@ def normalize_text(seed: str) -> str:
    return seed
-_WORDLIST_CACHE = {}
+_WORDLIST_CACHE = {}  # type: Dict[str, Wordlist]
-def load_wordlist(filename) -> tuple:
+class Wordlist(tuple):
    path = resource_path('wordlist', filename)
    if path not in _WORDLIST_CACHE:
        with open(path, 'r', encoding='utf-8') as f:
            s = f.read().strip()
        s = unicodedata.normalize('NFKD', s)
        lines = s.split('\n')
        wordlist = []
        for line in lines:
            line = line.split('#')[0]
            line = line.strip(' \r')
            assert ' ' not in line
            if line:
                wordlist.append(line)
-        # wordlists shouldn't be mutated, but just in case,
+    def __init__(self, words: Sequence[str]):
-        # convert it to a tuple
+        super().__init__()
-        _WORDLIST_CACHE[path] = tuple(wordlist)
+        index_from_word = {w: i for i, w in enumerate(words)}
-    return _WORDLIST_CACHE[path]
+        self._index_from_word = MappingProxyType(index_from_word)  # no mutation
    def index(self, word, start=None, stop=None) -> int:
        try:
            return self._index_from_word[word]
        except KeyError as e:
            raise ValueError from e
    def __contains__(self, word) -> bool:
        try:
            self.index(word)
        except ValueError:
            return False
        else:
            return True
    @classmethod
    def from_file(cls, filename) -> 'Wordlist':
        path = resource_path('wordlist', filename)
        if path not in _WORDLIST_CACHE:
            with open(path, 'r', encoding='utf-8') as f:
                s = f.read().strip()
            s = unicodedata.normalize('NFKD', s)
            lines = s.split('\n')
            words = []
            for line in lines:
                line = line.split('#')[0]
                line = line.strip(' \r')
                assert ' ' not in line
                if line:
                    words.append(line)
            _WORDLIST_CACHE[path] = Wordlist(words)
        return _WORDLIST_CACHE[path]
 filenames = {
@ -130,8 +152,7 @@ class Mnemonic(Logger):
        lang = lang or 'en'
        self.logger.info(f'language {lang}')
        filename = filenames.get(lang[0:2], 'english.txt')
-        self.wordlist = load_wordlist(filename)
+        self.wordlist = Wordlist.from_file(filename)
        self.wordlist_indexes = {w: i for i, w in enumerate(self.wordlist)}
        self.logger.info(f"wordlist has {len(self.wordlist)} words")
    @classmethod
@ -162,11 +183,11 @@ class Mnemonic(Logger):
        i = 0
        while words:
            w = words.pop()
-            k = self.wordlist_indexes[w]
+            k = self.wordlist.index(w)
            i = i*n + k
        return i
-    def make_seed(self, seed_type=None, *, num_bits=132):
+    def make_seed(self, seed_type=None, *, num_bits=132) -> str:
        if seed_type is None:
            seed_type = 'segwit'
        prefix = version.seed_prefix(seed_type)
--- a/electrum/old_mnemonic.py
+++ b/electrum/old_mnemonic.py
@ -23,10 +23,12 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 from .mnemonic import Wordlist
 # list of words from http://en.wiktionary.org/wiki/Wiktionary:Frequency_lists/Contemporary_poetry
-words = (
+_words = (
 "like",
 "just",
 "love",
@ -1654,9 +1656,10 @@ words = (
 "weapon",
 "weary",
 )
 _words_indexes = {w: i for i, w in enumerate(words)}
-n = len(words)
+wordlist = Wordlist(_words)
 n = len(wordlist)
 assert n == 1626
@ -1672,7 +1675,7 @@ def mn_encode( message ):
        w1 = (x%n)
        w2 = ((x//n) + w1)%n
        w3 = ((x//n//n) + w2)%n
-        out += [ words[w1], words[w2], words[w3] ]
+        out += [ wordlist[w1], wordlist[w2], wordlist[w3] ]
    return out
@ -1680,9 +1683,9 @@ def mn_decode( wlist ):
    out = ''
    for i in range(len(wlist)//3):
        word1, word2, word3 = wlist[3*i:3*i+3]
-        w1 =  _words_indexes[word1]
+        w1 =  wordlist.index(word1)
-        w2 = (_words_indexes[word2]) % n
+        w2 = (wordlist.index(word2)) % n
-        w3 = (_words_indexes[word3]) % n
+        w3 = (wordlist.index(word3)) % n
        x = w1 +n*((w2-w1)%n) +n*n*((w3-w2)%n)
        out += '%08x'%x
    return out