mirror of
https://github.com/LBRYFoundation/lbry-sdk.git
synced 2025-08-23 17:27:25 +00:00
make better resolve cache
This commit is contained in:
parent
6b193ab350
commit
2641a9abe5
2 changed files with 66 additions and 36 deletions
|
@ -55,6 +55,14 @@ class PathSegment(NamedTuple):
|
||||||
def normalized(self):
|
def normalized(self):
|
||||||
return normalize_name(self.name)
|
return normalize_name(self.name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_shortid(self):
|
||||||
|
return self.claim_id is not None and len(self.claim_id) < 40
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_fullid(self):
|
||||||
|
return self.claim_id is not None and len(self.claim_id) == 40
|
||||||
|
|
||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
q = {'name': self.name}
|
q = {'name': self.name}
|
||||||
if self.claim_id is not None:
|
if self.claim_id is not None:
|
||||||
|
|
|
@ -9,7 +9,7 @@ from elasticsearch import AsyncElasticsearch, NotFoundError, ConnectionError
|
||||||
from elasticsearch.helpers import async_streaming_bulk
|
from elasticsearch.helpers import async_streaming_bulk
|
||||||
|
|
||||||
from lbry.crypto.base58 import Base58
|
from lbry.crypto.base58 import Base58
|
||||||
from lbry.error import ResolveCensoredError, claim_id
|
from lbry.error import ResolveCensoredError, claim_id as parse_claim_id
|
||||||
from lbry.schema.result import Outputs, Censor
|
from lbry.schema.result import Outputs, Censor
|
||||||
from lbry.schema.tags import clean_tags
|
from lbry.schema.tags import clean_tags
|
||||||
from lbry.schema.url import URL, normalize_name
|
from lbry.schema.url import URL, normalize_name
|
||||||
|
@ -24,8 +24,8 @@ class SearchIndex:
|
||||||
self.index = index_prefix + 'claims'
|
self.index = index_prefix + 'claims'
|
||||||
self.sync_timeout = 600 # wont hit that 99% of the time, but can hit on a fresh import
|
self.sync_timeout = 600 # wont hit that 99% of the time, but can hit on a fresh import
|
||||||
self.logger = class_logger(__name__, self.__class__.__name__)
|
self.logger = class_logger(__name__, self.__class__.__name__)
|
||||||
self.search_cache = LRUCache(2 ** 16)
|
self.claim_cache = LRUCache(2 ** 15) # invalidated on touched
|
||||||
self.channel_cache = LRUCache(2 ** 16)
|
self.short_id_cache = LRUCache(2 ** 17) # never invalidated, since short ids are forever
|
||||||
|
|
||||||
async def start(self):
|
async def start(self):
|
||||||
if self.client:
|
if self.client:
|
||||||
|
@ -97,11 +97,18 @@ class SearchIndex:
|
||||||
|
|
||||||
async def claim_consumer(self, claim_producer):
|
async def claim_consumer(self, claim_producer):
|
||||||
await self.client.indices.refresh(self.index)
|
await self.client.indices.refresh(self.index)
|
||||||
|
touched = set()
|
||||||
async for ok, item in async_streaming_bulk(self.client, self._consume_claim_producer(claim_producer)):
|
async for ok, item in async_streaming_bulk(self.client, self._consume_claim_producer(claim_producer)):
|
||||||
if not ok:
|
if not ok:
|
||||||
self.logger.warning("indexing failed for an item: %s", item)
|
self.logger.warning("indexing failed for an item: %s", item)
|
||||||
|
else:
|
||||||
|
item = item.popitem()[1]
|
||||||
|
touched.add(item['_id'])
|
||||||
await self.client.indices.refresh(self.index)
|
await self.client.indices.refresh(self.index)
|
||||||
await self.client.indices.flush(self.index)
|
await self.client.indices.flush(self.index)
|
||||||
|
for claim_id in touched:
|
||||||
|
if claim_id in self.claim_cache:
|
||||||
|
self.claim_cache.pop(claim_id)
|
||||||
self.logger.info("Indexing done.")
|
self.logger.info("Indexing done.")
|
||||||
|
|
||||||
async def apply_filters(self, blocked_streams, blocked_channels, filtered_streams, filtered_channels):
|
async def apply_filters(self, blocked_streams, blocked_channels, filtered_streams, filtered_channels):
|
||||||
|
@ -112,6 +119,9 @@ class SearchIndex:
|
||||||
update = expand_query(channel_id__in=list(blockdict.keys()), censor_type=f"<{censor_type}")
|
update = expand_query(channel_id__in=list(blockdict.keys()), censor_type=f"<{censor_type}")
|
||||||
else:
|
else:
|
||||||
update = expand_query(claim_id__in=list(blockdict.keys()), censor_type=f"<{censor_type}")
|
update = expand_query(claim_id__in=list(blockdict.keys()), censor_type=f"<{censor_type}")
|
||||||
|
for claim_id in blockdict:
|
||||||
|
if claim_id in self.claim_cache:
|
||||||
|
self.claim_cache.pop(claim_id)
|
||||||
key = 'channel_id' if channels else 'claim_id'
|
key = 'channel_id' if channels else 'claim_id'
|
||||||
update['script'] = {
|
update['script'] = {
|
||||||
"source": f"ctx._source.censor_type={censor_type}; ctx._source.censoring_channel_hash=params[ctx._source.{key}]",
|
"source": f"ctx._source.censor_type={censor_type}; ctx._source.censoring_channel_hash=params[ctx._source.{key}]",
|
||||||
|
@ -135,8 +145,6 @@ class SearchIndex:
|
||||||
await self.client.indices.refresh(self.index)
|
await self.client.indices.refresh(self.index)
|
||||||
await self.client.update_by_query(self.index, body=make_query(2, blocked_channels, True), slices=32)
|
await self.client.update_by_query(self.index, body=make_query(2, blocked_channels, True), slices=32)
|
||||||
await self.client.indices.refresh(self.index)
|
await self.client.indices.refresh(self.index)
|
||||||
self.search_cache.clear()
|
|
||||||
self.channel_cache.clear()
|
|
||||||
|
|
||||||
async def delete_above_height(self, height):
|
async def delete_above_height(self, height):
|
||||||
await self.client.delete_by_query(self.index, expand_query(height='>'+str(height)))
|
await self.client.delete_by_query(self.index, expand_query(height='>'+str(height)))
|
||||||
|
@ -168,15 +176,32 @@ class SearchIndex:
|
||||||
return results, censored, censor
|
return results, censored, censor
|
||||||
|
|
||||||
async def get_many(self, *claim_ids):
|
async def get_many(self, *claim_ids):
|
||||||
cached = {claim_id: self.search_cache.get(claim_id) for claim_id in claim_ids if claim_id in self.search_cache}
|
missing = [claim_id for claim_id in claim_ids if claim_id not in self.claim_cache]
|
||||||
missing = [claim_id for claim_id in claim_ids if claim_id not in cached]
|
|
||||||
if missing:
|
if missing:
|
||||||
results = await self.client.mget(index=self.index, body={"ids": missing},
|
results = await self.client.mget(index=self.index, body={"ids": missing},
|
||||||
_source_excludes=['description', 'title'])
|
_source_excludes=['description', 'title'])
|
||||||
results = expand_result(filter(lambda doc: doc['found'], results["docs"]))
|
results = expand_result(filter(lambda doc: doc['found'], results["docs"]))
|
||||||
for result in results:
|
for result in results:
|
||||||
self.search_cache.set(result['claim_id'], result)
|
self.claim_cache.set(result['claim_id'], result)
|
||||||
return list(filter(None, map(self.search_cache.get, claim_ids)))
|
return list(filter(None, map(self.claim_cache.get, claim_ids)))
|
||||||
|
|
||||||
|
async def full_id_from_short_id(self, name, short_id, channel_id=None):
|
||||||
|
key = (channel_id or '') + name + short_id
|
||||||
|
if key not in self.short_id_cache:
|
||||||
|
query = {'name': name, 'claim_id': short_id}
|
||||||
|
if channel_id:
|
||||||
|
query['channel_id'] = channel_id
|
||||||
|
query['order_by'] = ['^channel_join']
|
||||||
|
query['channel_id'] = channel_id
|
||||||
|
query['signature_valid'] = True
|
||||||
|
else:
|
||||||
|
query['order_by'] = '^creation_height'
|
||||||
|
result, _, _ = await self.search(**query, limit=1)
|
||||||
|
if len(result) == 1:
|
||||||
|
result = result[0]['claim_id']
|
||||||
|
self.short_id_cache[key] = result
|
||||||
|
return self.short_id_cache.get(key, None)
|
||||||
|
|
||||||
|
|
||||||
async def search(self, **kwargs):
|
async def search(self, **kwargs):
|
||||||
if 'channel' in kwargs:
|
if 'channel' in kwargs:
|
||||||
|
@ -217,23 +242,24 @@ class SearchIndex:
|
||||||
async def resolve_channel_id(self, url: URL):
|
async def resolve_channel_id(self, url: URL):
|
||||||
if not url.has_channel:
|
if not url.has_channel:
|
||||||
return
|
return
|
||||||
key = 'cid:' + str(url.channel)
|
if url.channel.is_fullid:
|
||||||
if key in self.channel_cache:
|
return url.channel.claim_id
|
||||||
return self.channel_cache[key]
|
if url.channel.is_shortid:
|
||||||
|
channel_id = await self.full_id_from_short_id(url.channel.name, url.channel.claim_id)
|
||||||
|
if not channel_id:
|
||||||
|
return LookupError(f'Could not find channel in "{url}".')
|
||||||
|
return channel_id
|
||||||
|
|
||||||
query = url.channel.to_dict()
|
query = url.channel.to_dict()
|
||||||
if set(query) == {'name'}:
|
if set(query) == {'name'}:
|
||||||
query['is_controlling'] = True
|
query['is_controlling'] = True
|
||||||
else:
|
else:
|
||||||
query['order_by'] = ['^creation_height']
|
query['order_by'] = ['^creation_height']
|
||||||
if len(query.get('claim_id', '')) != 40:
|
matches, _, _ = await self.search(**query, limit=1)
|
||||||
matches, _, _ = await self.search(**query, limit=1)
|
if matches:
|
||||||
if matches:
|
channel_id = matches[0]['claim_id']
|
||||||
channel_id = matches[0]['claim_id']
|
|
||||||
else:
|
|
||||||
return LookupError(f'Could not find channel in "{url}".')
|
|
||||||
else:
|
else:
|
||||||
channel_id = query['claim_id']
|
return LookupError(f'Could not find channel in "{url}".')
|
||||||
self.channel_cache.set(key, channel_id)
|
|
||||||
return channel_id
|
return channel_id
|
||||||
|
|
||||||
async def resolve_stream(self, url: URL, channel_id: str = None):
|
async def resolve_stream(self, url: URL, channel_id: str = None):
|
||||||
|
@ -242,14 +268,14 @@ class SearchIndex:
|
||||||
if url.has_channel and channel_id is None:
|
if url.has_channel and channel_id is None:
|
||||||
return None
|
return None
|
||||||
query = url.stream.to_dict()
|
query = url.stream.to_dict()
|
||||||
stream = None
|
if url.stream.claim_id is not None:
|
||||||
if 'claim_id' in query and len(query['claim_id']) == 40:
|
if url.stream.is_fullid:
|
||||||
stream = (await self.get_many(query['claim_id']))
|
claim_id = url.stream.claim_id
|
||||||
stream = stream[0] if len(stream) else None
|
else:
|
||||||
else:
|
claim_id = await self.full_id_from_short_id(query['name'], query['claim_id'], channel_id)
|
||||||
key = (channel_id or '') + str(url.stream)
|
stream = await self.get_many(claim_id)
|
||||||
if key in self.search_cache:
|
return stream[0] if len(stream) else None
|
||||||
return self.search_cache[key]
|
|
||||||
if channel_id is not None:
|
if channel_id is not None:
|
||||||
if set(query) == {'name'}:
|
if set(query) == {'name'}:
|
||||||
# temporarily emulate is_controlling for claims in channel
|
# temporarily emulate is_controlling for claims in channel
|
||||||
|
@ -260,19 +286,15 @@ class SearchIndex:
|
||||||
query['signature_valid'] = True
|
query['signature_valid'] = True
|
||||||
elif set(query) == {'name'}:
|
elif set(query) == {'name'}:
|
||||||
query['is_controlling'] = True
|
query['is_controlling'] = True
|
||||||
if not stream:
|
matches, _, _ = await self.search(**query, limit=1)
|
||||||
matches, _, _ = await self.search(**query, limit=1)
|
if matches:
|
||||||
if matches:
|
return matches[0]
|
||||||
stream = matches[0]
|
|
||||||
key = (channel_id or '') + str(url.stream)
|
|
||||||
self.search_cache.set(key, stream)
|
|
||||||
return stream
|
|
||||||
|
|
||||||
async def _get_referenced_rows(self, txo_rows: List[dict]):
|
async def _get_referenced_rows(self, txo_rows: List[dict]):
|
||||||
txo_rows = [row for row in txo_rows if isinstance(row, dict)]
|
txo_rows = [row for row in txo_rows if isinstance(row, dict)]
|
||||||
repost_hashes = set(filter(None, map(itemgetter('reposted_claim_id'), txo_rows)))
|
repost_hashes = set(filter(None, map(itemgetter('reposted_claim_id'), txo_rows)))
|
||||||
channel_hashes = set(filter(None, (row['channel_id'] for row in txo_rows)))
|
channel_hashes = set(filter(None, (row['channel_id'] for row in txo_rows)))
|
||||||
channel_hashes |= set(map(claim_id, filter(None, (row['censoring_channel_hash'] for row in txo_rows))))
|
channel_hashes |= set(map(parse_claim_id, filter(None, (row['censoring_channel_hash'] for row in txo_rows))))
|
||||||
|
|
||||||
reposted_txos = []
|
reposted_txos = []
|
||||||
if repost_hashes:
|
if repost_hashes:
|
||||||
|
|
Loading…
Add table
Reference in a new issue