From b2027cfd660077bdd5e8f64f2bc0aab8b4c8e9dd Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 23 Dec 2020 18:30:52 -0300 Subject: [PATCH 1/3] sync in batches of 10, clearing after --- lbry/wallet/ledger.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/lbry/wallet/ledger.py b/lbry/wallet/ledger.py index 5f646d826..c5dc64494 100644 --- a/lbry/wallet/ledger.py +++ b/lbry/wallet/ledger.py @@ -514,7 +514,6 @@ class Ledger(metaclass=LedgerRegistry): ) return True - synced_txs = [] to_request = {} pending_synced_history = {} already_synced = set() @@ -536,21 +535,14 @@ class Ledger(metaclass=LedgerRegistry): to_request[i] = (txid, remote_height) log.debug( - "request %i transactions, %i/%i for %s are already synced", len(to_request), len(synced_txs), + "request %i transactions, %i/%i for %s are already synced", len(to_request), len(already_synced), len(remote_history), address ) remote_history_txids = set(txid for txid, _ in remote_history) async for tx in self.request_synced_transactions(to_request, remote_history_txids, address): pending_synced_history[tx_indexes[tx.id]] = f"{tx.id}:{tx.height}:" - synced_txs.append(tx) - if len(synced_txs) >= 100: + if len(pending_synced_history) % 100 == 0: log.info("Syncing address %s: %d/%d", address, len(pending_synced_history), len(to_request)) - await self.db.save_transaction_io_batch( - synced_txs, address, self.address_to_hash160(address), "" - ) - while synced_txs: - tx = synced_txs.pop() - self._on_transaction_controller.add(TransactionEvent(address, tx)) log.info("Sync finished for address %s: %d/%d", address, len(pending_synced_history), len(to_request)) assert len(pending_synced_history) == len(remote_history), \ @@ -562,11 +554,7 @@ class Ledger(metaclass=LedgerRegistry): if f"{txid}:{height}:" != pending_synced_history[i]: log.warning("history mismatch: %s vs %s", remote_history[remote_i], pending_synced_history[i]) synced_history += pending_synced_history[i] - await self.db.save_transaction_io_batch( - synced_txs, address, self.address_to_hash160(address), synced_history - ) - while synced_txs: - self._on_transaction_controller.add(TransactionEvent(address, synced_txs.pop())) + await self.db.set_address_history(address, synced_history) if address_manager is None: address_manager = await self.get_address_manager_for_address(address) @@ -647,8 +635,10 @@ class Ledger(metaclass=LedgerRegistry): pending_sync = {} async for tx in self.request_transactions(((txid, height) for txid, height in to_request.values())): pending_sync[tx.id] = tx - for f in asyncio.as_completed([self._sync(tx, remote_history, pending_sync) for tx in pending_sync.values()]): - yield await f + yield tx + if len(pending_sync) > 10: + await self._sync_and_save_batch(address, remote_history, pending_sync) + await self._sync_and_save_batch(address, remote_history, pending_sync) async def _single_batch(self, batch, remote_heights): heights = {remote_heights[txid] for txid in batch} @@ -660,6 +650,14 @@ class Ledger(metaclass=LedgerRegistry): await self.maybe_verify_transaction(tx, remote_height, merkle) yield tx + async def _sync_and_save_batch(self, address, remote_history, pending_txs): + await asyncio.gather(*(self._sync(tx, remote_history, pending_txs) for tx in pending_txs.values())) + await self.db.save_transaction_io_batch( + pending_txs.values(), address, self.address_to_hash160(address), "" + ) + while pending_txs: + self._on_transaction_controller.add(TransactionEvent(address, pending_txs.popitem()[1])) + async def _sync(self, tx, remote_history, pending_txs): check_db_for_txos = {} for txi in tx.inputs: From 270da80d64d78e0e8797e63a35f4a9e2e48947a9 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 23 Dec 2020 19:10:19 -0300 Subject: [PATCH 2/3] return whole batches --- lbry/wallet/ledger.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/lbry/wallet/ledger.py b/lbry/wallet/ledger.py index c5dc64494..a2b634bdf 100644 --- a/lbry/wallet/ledger.py +++ b/lbry/wallet/ledger.py @@ -605,6 +605,7 @@ class Ledger(metaclass=LedgerRegistry): header = await self.headers.get(remote_height) tx.position = merkle['pos'] tx.is_verified = merkle_root == header['merkle_root'] + return tx async def request_transactions(self, to_request: Tuple[Tuple[str, int], ...], cached=False): batches = [[]] @@ -614,7 +615,8 @@ class Ledger(metaclass=LedgerRegistry): if cached: if txid in self._tx_cache: if self._tx_cache[txid].tx is not None and self._tx_cache[txid].tx.is_verified: - yield self._tx_cache[txid].tx + tx = self._tx_cache[txid].tx + yield {tx.id: self._tx_cache[txid].tx} continue else: self._tx_cache[txid] = TransactionCacheItem() @@ -626,29 +628,29 @@ class Ledger(metaclass=LedgerRegistry): batches.pop() for batch in batches: - async for tx in self._single_batch(batch, remote_heights): - if cached: - self._tx_cache[tx.id].tx = tx - yield tx + txs = await self._single_batch(batch, remote_heights) + if cached: + for txid, tx in txs.items(): + self._tx_cache[txid].tx = tx + yield txs async def request_synced_transactions(self, to_request, remote_history, address): - pending_sync = {} - async for tx in self.request_transactions(((txid, height) for txid, height in to_request.values())): - pending_sync[tx.id] = tx - yield tx - if len(pending_sync) > 10: - await self._sync_and_save_batch(address, remote_history, pending_sync) - await self._sync_and_save_batch(address, remote_history, pending_sync) + async for txs in self.request_transactions(((txid, height) for txid, height in to_request.values())): + for tx in txs.values(): + yield tx + await self._sync_and_save_batch(address, remote_history, txs) async def _single_batch(self, batch, remote_heights): heights = {remote_heights[txid] for txid in batch} unrestriced = 0 < min(heights) < max(heights) < max(self.headers.checkpoints or [0]) batch_result = await self.network.retriable_call(self.network.get_transaction_batch, batch, not unrestriced) + txs = {} for txid, (raw, merkle) in batch_result.items(): remote_height = remote_heights[txid] tx = Transaction(unhexlify(raw), height=remote_height) + txs[tx.id] = tx await self.maybe_verify_transaction(tx, remote_height, merkle) - yield tx + return txs async def _sync_and_save_batch(self, address, remote_history, pending_txs): await asyncio.gather(*(self._sync(tx, remote_history, pending_txs) for tx in pending_txs.values())) @@ -757,7 +759,7 @@ class Ledger(metaclass=LedgerRegistry): txs: List[Transaction] = [] if len(outputs.txs) > 0: async for tx in self.request_transactions(tuple(outputs.txs), cached=True): - txs.append(tx) + txs.extend(tx.values()) _txos, blocked = outputs.inflate(txs) From adb418aafc55b5ced735141fb5aed683a52bd77c Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Wed, 23 Dec 2020 21:37:20 -0300 Subject: [PATCH 3/3] group cache hits in a single dict --- lbry/wallet/ledger.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lbry/wallet/ledger.py b/lbry/wallet/ledger.py index a2b634bdf..4dee98dc1 100644 --- a/lbry/wallet/ledger.py +++ b/lbry/wallet/ledger.py @@ -610,13 +610,13 @@ class Ledger(metaclass=LedgerRegistry): async def request_transactions(self, to_request: Tuple[Tuple[str, int], ...], cached=False): batches = [[]] remote_heights = {} + cache_hits = set() for txid, height in sorted(to_request, key=lambda x: x[1]): if cached: if txid in self._tx_cache: if self._tx_cache[txid].tx is not None and self._tx_cache[txid].tx.is_verified: - tx = self._tx_cache[txid].tx - yield {tx.id: self._tx_cache[txid].tx} + cache_hits.add(txid) continue else: self._tx_cache[txid] = TransactionCacheItem() @@ -626,6 +626,8 @@ class Ledger(metaclass=LedgerRegistry): batches[-1].append(txid) if not batches[-1]: batches.pop() + if cached and cache_hits: + yield {txid: self._tx_cache[txid].tx for txid in cache_hits} for batch in batches: txs = await self._single_batch(batch, remote_heights)