package server import ( "context" "encoding/hex" "github.com/btcsuite/btcutil/base58" "github.com/golang/protobuf/ptypes/wrappers" pb "github.com/lbryio/hub/protobuf/go" "math" //"github.com/lbryio/hub/schema" "github.com/lbryio/hub/util" "github.com/olivere/elastic/v7" "golang.org/x/text/cases" "golang.org/x/text/language" "gopkg.in/karalabe/cookiejar.v1/collections/deque" "log" "reflect" "strings" ) type record struct { Txid string `json:"tx_id"` Nout uint32 `json:"tx_nout"` Height uint32 `json:"height"` ClaimId string `json:"claim_id"` ChannelId string `json:"channel_id"` RepostedClaimId string `json:"reposted_claim_id"` CensorType uint32 `json:"censor_type"` CensoringChannelHash string `json:"censoring_channel_hash"` ShortUrl string `json:"short_url"` CanonicalUrl string `json:"canonical_url"` IsControlling bool `json:"is_controlling"` TakeOverHeight uint32 `json:"take_over_height"` CreationHeight uint32 `json:"creation_height"` ActivationHeight uint32 `json:"activation_height"` ExpirationHeight uint32 `json:"expiration_height"` ClaimsInChannel uint32 `json:"claims_in_channel"` Reposted uint32 `json:"reposted"` EffectiveAmount uint64 `json:"effective_amount"` SupportAmount uint64 `json:"support_amount"` TrendingGroup uint32 `json:"trending_group"` TrendingMixed float32 `json:"trending_mixed"` TrendingLocal float32 `json:"trending_local"` TrendingGlobal float32 `json:"trending_global"` Name string `json:"name"` } type orderField struct { Field string IsAsc bool } func StrArrToInterface(arr []string) []interface{} { searchVals := make([]interface{}, len(arr)) for i := 0; i < len(arr); i++ { searchVals[i] = arr[i] } return searchVals } func AddTermsField(arr []string, name string, q *elastic.BoolQuery) *elastic.BoolQuery { if len(arr) > 0 { searchVals := StrArrToInterface(arr) return q.Must(elastic.NewTermsQuery(name, searchVals...)) } return q } func AddIndividualTermFields(arr []string, name string, q *elastic.BoolQuery, invert bool) *elastic.BoolQuery { if len(arr) > 0 { for _, x := range arr { if invert { q = q.MustNot(elastic.NewTermQuery(name, x)) } else { q = q.Must(elastic.NewTermQuery(name, x)) } } return q } return q } func AddRangeField(rq *pb.RangeField, name string, q *elastic.BoolQuery) *elastic.BoolQuery { if rq == nil { return q } if len(rq.Value) > 1 { if rq.Op != pb.RangeField_EQ { return q } return AddTermsField(rq.Value, name, q) } if rq.Op == pb.RangeField_EQ { return q.Must(elastic.NewTermQuery(name, rq.Value[0])) } else if rq.Op == pb.RangeField_LT { return q.Must(elastic.NewRangeQuery(name).Lt(rq.Value[0])) } else if rq.Op == pb.RangeField_LTE { return q.Must(elastic.NewRangeQuery(name).Lte(rq.Value[0])) } else if rq.Op == pb.RangeField_GT { return q.Must(elastic.NewRangeQuery(name).Gt(rq.Value[0])) } else { // pb.RangeField_GTE return q.Must(elastic.NewRangeQuery(name).Gte(rq.Value[0])) } } func AddInvertibleField(field *pb.InvertibleField, name string, q *elastic.BoolQuery) *elastic.BoolQuery { if field == nil { return q } searchVals := StrArrToInterface(field.Value) if field.Invert { q = q.MustNot(elastic.NewTermsQuery(name, searchVals...)) if name == "channel_id.keyword" { q = q.MustNot(elastic.NewTermsQuery("_id", searchVals...)) } return q } else { return q.Must(elastic.NewTermsQuery(name, searchVals...)) } } func (s *Server) normalizeTag(tag string) string { c := cases.Lower(language.English) res := s.MultiSpaceRe.ReplaceAll( s.WeirdCharsRe.ReplaceAll( []byte(strings.TrimSpace(strings.Replace(c.String(tag), "'", "", -1))), []byte(" ")), []byte(" ")) return string(res) } func (s *Server) cleanTags(tags []string) []string { cleanedTags := make([]string, len(tags)) for i, tag := range tags { cleanedTags[i] = s.normalizeTag(tag) } return cleanedTags } func (s *Server) Search(ctx context.Context, in *pb.SearchRequest) (*pb.Outputs, error) { var client *elastic.Client = nil if s.EsClient == nil { esUrl := s.Args.EsHost + ":" + s.Args.EsPort tmpClient, err := elastic.NewClient(elastic.SetURL(esUrl), elastic.SetSniff(false)) if err != nil { return nil, err } client = tmpClient s.EsClient = client } else { client = s.EsClient } claimTypes := map[string]int { "stream": 1, "channel": 2, "repost": 3, "collection": 4, } streamTypes := map[string]int { "video": 1, "audio": 2, "image": 3, "document": 4, "binary": 5, "model": 6, } replacements := map[string]string { "name": "normalized", "txid": "tx_id", "claim_hash": "_id", } textFields := map[string]bool { "author": true, "canonical_url": true, "channel_id": true, "claim_name": true, "description": true, "claim_id": true, "media_type": true, "normalized": true, "public_key_bytes": true, "public_key_hash": true, "short_url": true, "signature": true, "signature_digest": true, "stream_type": true, "title": true, "tx_id": true, "fee_currency": true, "reposted_claim_id": true, "tags": true, } var from = 0 var pageSize = 10 var orderBy []orderField // Ping the Elasticsearch server to get e.g. the version number //_, code, err := client.Ping("http://127.0.0.1:9200").Do(ctx) //if err != nil { // return nil, err //} //if code != 200 { // return nil, errors.New("ping failed") //} q := elastic.NewBoolQuery() if in.IsControlling != nil { q = q.Must(elastic.NewTermQuery("is_controlling", in.IsControlling.Value)) } if in.AmountOrder != nil { in.Limit.Value = 1 in.OrderBy = []string{"effective_amount"} in.Offset = &wrappers.Int32Value{Value: in.AmountOrder.Value - 1} } if in.Limit != nil { pageSize = int(in.Limit.Value) } if in.Offset != nil { from = int(in.Offset.Value) } if len(in.Name) > 0 { normalized := make([]string, len(in.Name)) for i := 0; i < len(in.Name); i++ { normalized[i] = util.Normalize(in.Name[i]) } in.Normalized = normalized } if len(in.OrderBy) > 0 { for _, x := range in.OrderBy { var toAppend string var isAsc = false if x[0] == '^' { isAsc = true x = x[1:] } if _, ok := replacements[x]; ok { toAppend = replacements[x] } else { toAppend = x } if _, ok := textFields[toAppend]; ok { toAppend = toAppend + ".keyword" } orderBy = append(orderBy, orderField{toAppend, isAsc}) } } if len(in.ClaimType) > 0 { searchVals := make([]interface{}, len(in.ClaimType)) for i := 0; i < len(in.ClaimType); i++ { searchVals[i] = claimTypes[in.ClaimType[i]] } q = q.Must(elastic.NewTermsQuery("claim_type", searchVals...)) } if len(in.StreamType) > 0 { searchVals := make([]interface{}, len(in.StreamType)) for i := 0; i < len(in.StreamType); i++ { searchVals[i] = streamTypes[in.StreamType[i]] } q = q.Must(elastic.NewTermsQuery("stream_type", searchVals...)) } if len(in.XId) > 0 { searchVals := make([]interface{}, len(in.XId)) for i := 0; i < len(in.XId); i++ { util.ReverseBytes(in.XId[i]) searchVals[i] = hex.Dump(in.XId[i]) } if len(in.XId) == 1 && len(in.XId[0]) < 20 { q = q.Must(elastic.NewPrefixQuery("_id", string(in.XId[0]))) } else { q = q.Must(elastic.NewTermsQuery("_id", searchVals...)) } } if in.ClaimId != nil { searchVals := StrArrToInterface(in.ClaimId.Value) if len(in.ClaimId.Value) == 1 && len(in.ClaimId.Value[0]) < 20 { if in.ClaimId.Invert { q = q.MustNot(elastic.NewPrefixQuery("claim_id.keyword", in.ClaimId.Value[0])) } else { q = q.Must(elastic.NewPrefixQuery("claim_id.keyword", in.ClaimId.Value[0])) } } else { if in.ClaimId.Invert { q = q.MustNot(elastic.NewTermsQuery("claim_id.keyword", searchVals...)) } else { q = q.Must(elastic.NewTermsQuery("claim_id.keyword", searchVals...)) } } } if in.PublicKeyId != "" { value := hex.EncodeToString(base58.Decode(in.PublicKeyId)[1:21]) q = q.Must(elastic.NewTermQuery("public_key_hash.keyword", value)) } if in.HasChannelSignature != nil && in.HasChannelSignature.Value { q = q.Must(elastic.NewExistsQuery("signature_digest")) if in.SignatureValid != nil { q = q.Must(elastic.NewTermQuery("signature_valid", in.SignatureValid.Value)) } } else if in.SignatureValid != nil { q = q.MinimumNumberShouldMatch(1) q = q.Should(elastic.NewBoolQuery().MustNot(elastic.NewExistsQuery("signature_digest"))) q = q.Should(elastic.NewTermQuery("signature_valid", in.SignatureValid.Value)) } if in.HasSource != nil { q = q.MinimumNumberShouldMatch(1) isStreamOrRepost := elastic.NewTermsQuery("claim_type", claimTypes["stream"], claimTypes["repost"]) q = q.Should(elastic.NewBoolQuery().Must(isStreamOrRepost, elastic.NewMatchQuery("has_source", in.HasSource.Value))) q = q.Should(elastic.NewBoolQuery().MustNot(isStreamOrRepost)) q = q.Should(elastic.NewBoolQuery().Must(elastic.NewTermQuery("reposted_claim_type", claimTypes["channel"]))) } if in.TxNout != nil { q = q.Must(elastic.NewTermQuery("tx_nout", in.TxNout.Value)) } q = AddTermsField(in.PublicKeyHash, "public_key_hash.keyword", q) q = AddTermsField(in.Author, "author.keyword", q) q = AddTermsField(in.Title, "title.keyword", q) q = AddTermsField(in.CanonicalUrl, "canonical_url.keyword", q) q = AddTermsField(in.ClaimName, "claim_name.keyword", q) q = AddTermsField(in.Description, "description.keyword", q) q = AddTermsField(in.MediaType, "media_type.keyword", q) q = AddTermsField(in.Normalized, "normalized.keyword", q) q = AddTermsField(in.PublicKeyBytes, "public_key_bytes.keyword", q) q = AddTermsField(in.ShortUrl, "short_url.keyword", q) q = AddTermsField(in.Signature, "signature.keyword", q) q = AddTermsField(in.SignatureDigest, "signature_digest.keyword", q) q = AddTermsField(in.TxId, "tx_id.keyword", q) q = AddTermsField(in.FeeCurrency, "fee_currency.keyword", q) q = AddTermsField(in.RepostedClaimId, "reposted_claim_id.keyword", q) q = AddTermsField(s.cleanTags(in.AnyTags), "tags.keyword", q) q = AddIndividualTermFields(s.cleanTags(in.AllTags), "tags.keyword", q, false) q = AddIndividualTermFields(s.cleanTags(in.NotTags), "tags.keyword", q, true) q = AddTermsField(in.AnyLanguages, "languages", q) q = AddIndividualTermFields(in.AllLanguages, "languages", q, false) q = AddInvertibleField(in.ChannelId, "channel_id.keyword", q) q = AddInvertibleField(in.ChannelIds, "channel_id.keyword", q) q = AddRangeField(in.TxPosition, "tx_position", q) q = AddRangeField(in.Amount, "amount", q) q = AddRangeField(in.Timestamp, "timestamp", q) q = AddRangeField(in.CreationTimestamp, "creation_timestamp", q) q = AddRangeField(in.Height, "height", q) q = AddRangeField(in.CreationHeight, "creation_height", q) q = AddRangeField(in.ActivationHeight, "activation_height", q) q = AddRangeField(in.ExpirationHeight, "expiration_height", q) q = AddRangeField(in.ReleaseTime, "release_time", q) q = AddRangeField(in.Reposted, "reposted", q) q = AddRangeField(in.FeeAmount, "fee_amount", q) q = AddRangeField(in.Duration, "duration", q) q = AddRangeField(in.CensorType, "censor_type", q) q = AddRangeField(in.ChannelJoin, "channel_join", q) q = AddRangeField(in.EffectiveAmount, "effective_amount", q) q = AddRangeField(in.SupportAmount, "support_amount", q) q = AddRangeField(in.TrendingGroup, "trending_group", q) q = AddRangeField(in.TrendingMixed, "trending_mixed", q) q = AddRangeField(in.TrendingLocal, "trending_local", q) q = AddRangeField(in.TrendingGlobal, "trending_global", q) if in.Text != "" { textQuery := elastic.NewSimpleQueryStringQuery(in.Text). FieldWithBoost("claim_name", 4). FieldWithBoost("channel_name", 8). FieldWithBoost("title", 1). FieldWithBoost("description", 0.5). FieldWithBoost("author", 1). FieldWithBoost("tags", 0.5) q = q.Must(textQuery) } var searchIndices = []string{} if s.Args.Dev { indices, err := client.IndexNames() if err != nil { log.Fatalln(err) } var numIndices = 0 if len(indices) > 0 { numIndices = len(indices) - 1 } searchIndices = make([]string, numIndices) j := 0 for i := 0; j < numIndices; i++ { if indices[i] == "claims" { continue } searchIndices[j] = indices[i] j = j + 1 } } fsc := elastic.NewFetchSourceContext(true).Exclude("description", "title")//.Include("_id") search := client.Search(). Index(searchIndices...). FetchSourceContext(fsc). Query(q). // specify the query From(0).Size(1000) for _, x := range orderBy { search = search.Sort(x.Field, x.IsAsc) } searchResult, err := search.Do(ctx) // execute if err != nil { return nil, err } log.Printf("%s: found %d results in %dms\n", in.Text, len(searchResult.Hits.Hits), searchResult.TookInMillis) var txos []*pb.Output var records []*record var blocked []*pb.Blocked = make([]*pb.Blocked, 0) records = make([]*record, 0, searchResult.TotalHits()) var r record for _, item := range searchResult.Each(reflect.TypeOf(r)) { if t, ok := item.(record); ok { records = append(records, &t) } } records = removeBlocked(records, &blocked) if in.RemoveDuplicates != nil { records = removeDuplicates(records) } if in.LimitClaimsPerChannel != nil && in.LimitClaimsPerChannel.Value > 0 { records = searchAhead(records, pageSize, int(in.LimitClaimsPerChannel.Value)) } finalLength := int(math.Min(float64(len(records)), float64(pageSize))) txos = make([]*pb.Output, 0, finalLength) var j = 0 for i := from; i < from + finalLength && i < len(records) && j < finalLength; i++ { t := records[i] res := t.recordToOutput() txos = append(txos, res) j += 1 } //// or if you want more control //for _, hit := range searchResult.Hits.Hits { // // hit.Index contains the name of the index // // var t map[string]interface{} // or could be a Record // err := json.Unmarshal(hit.Source, &t) // if err != nil { // return nil, err // } // // b, err := json.MarshalIndent(t, "", " ") // if err != nil { // fmt.Println("error:", err) // } // fmt.Println(string(b)) // //for k := range t { // // fmt.Println(k) // //} // //return nil, nil //} if in.NoTotals != nil && !in.NoTotals.Value { return &pb.Outputs{ Txos: txos, Offset: uint32(int64(from) + searchResult.TotalHits()), Blocked: blocked, }, nil } var blockedTotal uint32 = 0 for _, b := range blocked { blockedTotal += b.Count } return &pb.Outputs{ Txos: txos, Total: uint32(searchResult.TotalHits()), Offset: uint32(int64(from) + searchResult.TotalHits()), Blocked: blocked, BlockedTotal: blockedTotal, }, nil } func sumCounters(channelCounters map[string]int) int { var sum int = 0 for _, v := range channelCounters { sum += v } return sum } func searchAhead(searchHits []*record, pageSize int, perChannelPerPage int) []*record { finalHits := make([]*record, 0 , len(searchHits)) var channelCounters map[string]int channelCounters = make(map[string]int) nextPageHitsMaybeCheckLater := deque.New() searchHitsQ := deque.New() for _, rec := range searchHits { searchHitsQ.PushRight(rec) } for !searchHitsQ.Empty() || !nextPageHitsMaybeCheckLater.Empty() { if len(finalHits) > 0 && len(finalHits) % pageSize == 0 { channelCounters = make(map[string]int) } else if len(finalHits) != 0 { // means last page was incomplete and we are left with bad replacements break } for i := 0; i < nextPageHitsMaybeCheckLater.Size(); i++ { rec := nextPageHitsMaybeCheckLater.PopLeft().(*record) if perChannelPerPage > 0 && channelCounters[rec.ChannelId] < perChannelPerPage { finalHits = append(finalHits, rec) channelCounters[rec.ChannelId] = channelCounters[rec.ChannelId] + 1 } } for !searchHitsQ.Empty() { hit := searchHitsQ.PopLeft().(*record) if hit.ChannelId == "" || perChannelPerPage < 0 { finalHits = append(finalHits, hit) } else if channelCounters[hit.ChannelId] < perChannelPerPage { finalHits = append(finalHits, hit) channelCounters[hit.ChannelId] = channelCounters[hit.ChannelId] + 1 if len(finalHits) % pageSize == 0 { break } } else { nextPageHitsMaybeCheckLater.PushRight(hit) } } } return finalHits } func (r *record) recordToOutput() *pb.Output { return &pb.Output{ TxHash: util.ToHash(r.Txid), Nout: r.Nout, Height: r.Height, Meta: &pb.Output_Claim{ Claim: &pb.ClaimMeta{ //Channel: //Repost: ShortUrl: r.ShortUrl, CanonicalUrl: r.CanonicalUrl, IsControlling: r.IsControlling, TakeOverHeight: r.TakeOverHeight, CreationHeight: r.CreationHeight, ActivationHeight: r.ActivationHeight, ExpirationHeight: r.ExpirationHeight, ClaimsInChannel: r.ClaimsInChannel, Reposted: r.Reposted, EffectiveAmount: r.EffectiveAmount, SupportAmount: r.SupportAmount, TrendingGroup: r.TrendingGroup, TrendingMixed: r.TrendingMixed, TrendingLocal: r.TrendingLocal, TrendingGlobal: r.TrendingGlobal, }, }, } } func (r *record) getHitId() string { if r.RepostedClaimId != "" { return r.RepostedClaimId } else { return r.ClaimId } } func removeDuplicates(searchHits []*record) []*record { dropped := make(map[*record]bool) // claim_id -> (creation_height, hit_id), where hit_id is either reposted claim id or original knownIds := make(map[string]*record) for _, hit := range searchHits { hitHeight := hit.Height hitId := hit.getHitId() if knownIds[hitId] == nil { knownIds[hitId] = hit } else { prevHit := knownIds[hitId] if hitHeight < prevHit.Height { knownIds[hitId] = hit dropped[prevHit] = true } else { dropped[hit] = true } } } deduped := make([]*record, len(searchHits) - len(dropped)) var i = 0 for _, hit := range searchHits { if !dropped[hit] { deduped[i] = hit i++ } } return deduped } func removeBlocked(searchHits []*record, blocked *[]*pb.Blocked) []*record { newHits := make([]*record, 0, len(searchHits)) blockedChannels := make(map[string]*pb.Blocked) for _, r := range searchHits { if r.CensorType != 0 { if blockedChannels[r.ChannelId] == nil { blockedObj := &pb.Blocked{ Count: 1, Channel: r.recordToOutput(), } *blocked = append(*blocked, blockedObj) blockedChannels[r.ChannelId] = blockedObj } else { blockedChannels[r.ChannelId].Count += 1 } } else { newHits = append(newHits, r) } } return newHits }