diff --git a/lbry/schema/url.py b/lbry/schema/url.py index 8f68600bd..fe6d73ff1 100644 --- a/lbry/schema/url.py +++ b/lbry/schema/url.py @@ -1,12 +1,12 @@ import re import unicodedata -from typing import NamedTuple, Tuple +from typing import Iterable, NamedTuple, Pattern, Tuple -def _create_url_regex(): +def _create_url_regex(legacy=False): # see https://spec.lbry.com/ and test_url.py invalid_names_regex = \ - r"[^=&#:$@%?;\"/\\<>%{}|^~`\[\]" \ + r"[^=&#:$@%*?;\"/\\<>%{}|^~`\[\]" \ r"\u0000-\u0020\uD800-\uDFFF\uFFFE-\uFFFF]+" def _named(name, regex): @@ -18,29 +18,42 @@ def _create_url_regex(): def _oneof(*choices): return _group('|'.join(choices)) + def _legacy_claim(name, prefix=""): + return _group( + _named(name + "_name", prefix + invalid_names_regex) + + _oneof( + _group('#' + _named(name + "_claim_id", "[0-9a-f]{1,40}")), + _group(':' + _named(name + "_sequence", '[1-9][0-9]*')), + _group(r'\$' + _named(name + "_amount_order", '[1-9][0-9]*')) + ) + '?' + ) + def _claim(name, prefix=""): return _group( _named(name+"_name", prefix + invalid_names_regex) + _oneof( - _group('#' + _named(name+"_claim_id", "[0-9a-f]{1,40}")), - _group(':' + _named(name+"_sequence", '[1-9][0-9]*')), + _group(':' + _named(name+"_claim_id", "[0-9a-f]{1,40}")), + _group(r'\*' + _named(name+"_sequence", '[1-9][0-9]*')), _group(r'\$' + _named(name+"_amount_order", '[1-9][0-9]*')) ) + '?' ) + claim = _claim if not legacy else _legacy_claim + return ( '^' + _named("scheme", "lbry://") + '?' + _oneof( - _group(_claim("channel_with_stream", "@") + "/" + _claim("stream_in_channel")), - _claim("channel", "@"), - _claim("stream") + _group(claim("channel_with_stream", "@") + "/" + claim("stream_in_channel")), + claim("channel", "@"), + claim("stream") ) + '$' ) URL_REGEX = _create_url_regex() +URL_REGEX_LEGACY = _create_url_regex(legacy=True) def normalize_name(name): @@ -69,9 +82,9 @@ class PathSegment(NamedTuple): def __str__(self): if self.claim_id is not None: - return f"{self.name}#{self.claim_id}" + return f"{self.name}:{self.claim_id}" elif self.sequence is not None: - return f"{self.name}:{self.sequence}" + return f"{self.name}*{self.sequence}" elif self.amount_order is not None: return f"{self.name}${self.amount_order}" return self.name @@ -104,9 +117,13 @@ class URL(NamedTuple): def __str__(self): return f"lbry://{'/'.join(str(p) for p in self.parts)}" + @staticmethod + def _first_match(x: str, ptns: Iterable[Pattern[str]]): + return next(filter(None, (re.match(ptn, x) for ptn in ptns)), None) + @classmethod def parse(cls, url): - match = re.match(URL_REGEX, url) + match = URL._first_match(url, (URL_REGEX, URL_REGEX_LEGACY)) if match is None: raise ValueError('Invalid LBRY URL') diff --git a/tests/unit/schema/test_url.py b/tests/unit/schema/test_url.py index 838f010ff..aa2bd6191 100644 --- a/tests/unit/schema/test_url.py +++ b/tests/unit/schema/test_url.py @@ -42,19 +42,19 @@ class TestURLParsing(unittest.TestCase): url = self._assert_url # stream url('test', stream_name='test') - url('test:1', stream_name='test', stream_sequence='1') + url('test*1', stream_name='test', stream_sequence='1') url('test$1', stream_name='test', stream_amount_order='1') - url(f'test#{claim_id}', stream_name='test', stream_claim_id=claim_id) + url(f'test:{claim_id}', stream_name='test', stream_claim_id=claim_id) # channel url('@test', channel_name='@test') - url('@test:1', channel_name='@test', channel_sequence='1') + url('@test*1', channel_name='@test', channel_sequence='1') url('@test$1', channel_name='@test', channel_amount_order='1') - url(f'@test#{claim_id}', channel_name='@test', channel_claim_id=claim_id) + url(f'@test:{claim_id}', channel_name='@test', channel_claim_id=claim_id) # channel/stream url('lbry://@test/stuff', channel_name='@test', stream_name='stuff') - url('lbry://@test:1/stuff', channel_name='@test', channel_sequence='1', stream_name='stuff') + url('lbry://@test*1/stuff', channel_name='@test', channel_sequence='1', stream_name='stuff') url('lbry://@test$1/stuff', channel_name='@test', channel_amount_order='1', stream_name='stuff') - url(f'lbry://@test#{claim_id}/stuff', channel_name='@test', channel_claim_id=claim_id, stream_name='stuff') + url(f'lbry://@test:{claim_id}/stuff', channel_name='@test', channel_claim_id=claim_id, stream_name='stuff') # unicode regex edges _url = lambda name: url(name, stream_name=name) _url('\uD799') @@ -104,10 +104,10 @@ class TestURLParsing(unittest.TestCase): fail("lbry://test@") fail("lbry://tes@t") fail(f"lbry://test:1#{claim_id}") - fail("lbry://test:0") + fail("lbry://test*0") fail("lbry://test$0") fail("lbry://test/path") - fail("lbry://@test1:1ab/fakepath") + fail("lbry://@test1*1ab/fakepath") fail("lbry://test:1:1:1") fail("whatever/lbry://test") fail("lbry://lbry://test") @@ -115,5 +115,5 @@ class TestURLParsing(unittest.TestCase): fail("lbry://abc:0x123") fail("lbry://abc:0x123/page") fail("lbry://@test1#ABCDEF/fakepath") - fail("test:0001") + fail("test*0001") fail("lbry://@test1$1/fakepath?arg1&arg2&arg3")