A vibe coded tangled fork which supports pijul.
at 1f5feacc4fd25142d88abe3623ece012f1320427 304 lines 8.2 kB view raw
1// heavily inspired by gitea's model (basically copy-pasted) 2package pulls_indexer 3 4import ( 5 "context" 6 "errors" 7 "log" 8 "os" 9 10 "github.com/blevesearch/bleve/v2" 11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 16 "github.com/blevesearch/bleve/v2/index/upsidedown" 17 "github.com/blevesearch/bleve/v2/mapping" 18 "github.com/blevesearch/bleve/v2/search/query" 19 "tangled.org/core/appview/db" 20 "tangled.org/core/appview/indexer/base36" 21 "tangled.org/core/appview/indexer/bleve" 22 "tangled.org/core/appview/models" 23 tlog "tangled.org/core/log" 24) 25 26const ( 27 pullIndexerAnalyzer = "pullIndexer" 28 pullIndexerDocType = "pullIndexerDocType" 29 30 unicodeNormalizeName = "uicodeNormalize" 31) 32 33type Indexer struct { 34 indexer bleve.Index 35 path string 36} 37 38func NewIndexer(indexDir string) *Indexer { 39 return &Indexer{ 40 path: indexDir, 41 } 42} 43 44// Init initializes the indexer 45func (ix *Indexer) Init(ctx context.Context, e db.Execer) { 46 l := tlog.FromContext(ctx) 47 existed, err := ix.intialize(ctx) 48 if err != nil { 49 log.Fatalln("failed to initialize pull indexer", err) 50 } 51 if !existed { 52 l.Debug("Populating the pull indexer") 53 err := PopulateIndexer(ctx, ix, e) 54 if err != nil { 55 log.Fatalln("failed to populate pull indexer", err) 56 } 57 } 58 59 count, _ := ix.indexer.DocCount() 60 l.Info("Initialized the pull indexer", "docCount", count) 61} 62 63func generatePullIndexMapping() (mapping.IndexMapping, error) { 64 mapping := bleve.NewIndexMapping() 65 docMapping := bleve.NewDocumentMapping() 66 67 textFieldMapping := bleve.NewTextFieldMapping() 68 textFieldMapping.Store = false 69 textFieldMapping.IncludeInAll = false 70 71 keywordFieldMapping := bleve.NewKeywordFieldMapping() 72 keywordFieldMapping.Store = false 73 keywordFieldMapping.IncludeInAll = false 74 75 // numericFieldMapping := bleve.NewNumericFieldMapping() 76 77 docMapping.AddFieldMappingsAt("title", textFieldMapping) 78 docMapping.AddFieldMappingsAt("body", textFieldMapping) 79 80 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 81 docMapping.AddFieldMappingsAt("state", keywordFieldMapping) 82 docMapping.AddFieldMappingsAt("author_did", keywordFieldMapping) 83 docMapping.AddFieldMappingsAt("labels", keywordFieldMapping) 84 85 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 86 "type": unicodenorm.Name, 87 "form": unicodenorm.NFC, 88 }) 89 if err != nil { 90 return nil, err 91 } 92 93 err = mapping.AddCustomAnalyzer(pullIndexerAnalyzer, map[string]any{ 94 "type": custom.Name, 95 "char_filters": []string{}, 96 "tokenizer": unicode.Name, 97 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 98 }) 99 if err != nil { 100 return nil, err 101 } 102 103 mapping.DefaultAnalyzer = pullIndexerAnalyzer 104 mapping.AddDocumentMapping(pullIndexerDocType, docMapping) 105 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 106 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 107 108 return mapping, nil 109} 110 111func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 112 if ix.indexer != nil { 113 return false, errors.New("indexer is already initialized") 114 } 115 116 indexer, err := openIndexer(ctx, ix.path) 117 if err != nil { 118 return false, err 119 } 120 if indexer != nil { 121 ix.indexer = indexer 122 return true, nil 123 } 124 125 mapping, err := generatePullIndexMapping() 126 if err != nil { 127 return false, err 128 } 129 indexer, err = bleve.New(ix.path, mapping) 130 if err != nil { 131 return false, err 132 } 133 134 ix.indexer = indexer 135 136 return false, nil 137} 138 139func openIndexer(ctx context.Context, path string) (bleve.Index, error) { 140 l := tlog.FromContext(ctx) 141 indexer, err := bleve.Open(path) 142 if err != nil { 143 if errors.Is(err, upsidedown.IncompatibleVersion) { 144 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding") 145 return nil, os.RemoveAll(path) 146 } 147 return nil, nil 148 } 149 return indexer, nil 150} 151 152func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error { 153 l := tlog.FromContext(ctx) 154 155 pulls, err := db.GetPulls(e) 156 if err != nil { 157 return err 158 } 159 count := len(pulls) 160 err = ix.Index(ctx, pulls...) 161 if err != nil { 162 return err 163 } 164 l.Info("pulls indexed", "count", count) 165 return err 166} 167 168type pullData struct { 169 ID int64 `json:"id"` 170 RepoAt string `json:"repo_at"` 171 PullID int `json:"pull_id"` 172 Title string `json:"title"` 173 Body string `json:"body"` 174 State string `json:"state"` 175 AuthorDid string `json:"author_did"` 176 Labels []string `json:"labels"` 177 178 Comments []pullCommentData `json:"comments"` 179} 180 181func makePullData(pull *models.Pull) *pullData { 182 return &pullData{ 183 ID: int64(pull.ID), 184 RepoAt: pull.RepoAt.String(), 185 PullID: pull.PullId, 186 Title: pull.Title, 187 Body: pull.Body, 188 State: pull.State.String(), 189 AuthorDid: pull.OwnerDid, 190 Labels: pull.Labels.LabelNames(), 191 } 192} 193 194// Type returns the document type, for bleve's mapping.Classifier interface. 195func (i *pullData) Type() string { 196 return pullIndexerDocType 197} 198 199type pullCommentData struct { 200 Body string `json:"body"` 201} 202 203type searchResult struct { 204 Hits []int64 205 Total uint64 206} 207 208const maxBatchSize = 20 209 210func (ix *Indexer) Index(ctx context.Context, pulls ...*models.Pull) error { 211 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize) 212 for _, pull := range pulls { 213 pullData := makePullData(pull) 214 if err := batch.Index(base36.Encode(pullData.ID), pullData); err != nil { 215 return err 216 } 217 } 218 return batch.Flush() 219} 220 221func (ix *Indexer) Delete(ctx context.Context, pullID int64) error { 222 return ix.indexer.Delete(base36.Encode(pullID)) 223} 224 225func (ix *Indexer) Search(ctx context.Context, opts models.PullSearchOptions) (*searchResult, error) { 226 var musts []query.Query 227 var mustNots []query.Query 228 229 // TODO(boltless): remove this after implementing pulls page pagination 230 limit := opts.Page.Limit 231 if limit == 0 { 232 limit = 500 233 } 234 235 for _, keyword := range opts.Keywords { 236 musts = append(musts, bleve.NewDisjunctionQuery( 237 bleveutil.MatchAndQuery("title", keyword, pullIndexerAnalyzer, 0), 238 bleveutil.MatchAndQuery("body", keyword, pullIndexerAnalyzer, 0), 239 )) 240 } 241 242 for _, phrase := range opts.Phrases { 243 musts = append(musts, bleve.NewDisjunctionQuery( 244 bleveutil.MatchPhraseQuery("title", phrase, pullIndexerAnalyzer), 245 bleveutil.MatchPhraseQuery("body", phrase, pullIndexerAnalyzer), 246 )) 247 } 248 249 for _, keyword := range opts.NegatedKeywords { 250 mustNots = append(mustNots, bleve.NewDisjunctionQuery( 251 bleveutil.MatchAndQuery("title", keyword, pullIndexerAnalyzer, 0), 252 bleveutil.MatchAndQuery("body", keyword, pullIndexerAnalyzer, 0), 253 )) 254 } 255 256 for _, phrase := range opts.NegatedPhrases { 257 mustNots = append(mustNots, bleve.NewDisjunctionQuery( 258 bleveutil.MatchPhraseQuery("title", phrase, pullIndexerAnalyzer), 259 bleveutil.MatchPhraseQuery("body", phrase, pullIndexerAnalyzer), 260 )) 261 } 262 263 musts = append(musts, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt)) 264 if opts.State != nil { 265 musts = append(musts, bleveutil.KeywordFieldQuery("state", opts.State.String())) 266 } 267 268 if opts.AuthorDid != "" { 269 musts = append(musts, bleveutil.KeywordFieldQuery("author_did", opts.AuthorDid)) 270 } 271 272 for _, label := range opts.Labels { 273 musts = append(musts, bleveutil.KeywordFieldQuery("labels", label)) 274 } 275 276 if opts.NegatedAuthorDid != "" { 277 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("author_did", opts.NegatedAuthorDid)) 278 } 279 280 for _, label := range opts.NegatedLabels { 281 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("labels", label)) 282 } 283 284 indexerQuery := bleve.NewBooleanQuery() 285 indexerQuery.AddMust(musts...) 286 indexerQuery.AddMustNot(mustNots...) 287 searchReq := bleve.NewSearchRequestOptions(indexerQuery, limit, opts.Page.Offset, false) 288 res, err := ix.indexer.SearchInContext(ctx, searchReq) 289 if err != nil { 290 return nil, nil 291 } 292 ret := &searchResult{ 293 Total: res.Total, 294 Hits: make([]int64, len(res.Hits)), 295 } 296 for i, hit := range res.Hits { 297 id, err := base36.Decode(hit.ID) 298 if err != nil { 299 return nil, err 300 } 301 ret.Hits[i] = id 302 } 303 return ret, nil 304}