A vibe coded tangled fork which supports pijul.
at 1f5feacc4fd25142d88abe3623ece012f1320427 303 lines 8.4 kB view raw
1// heavily inspired by gitea's model (basically copy-pasted) 2package issues_indexer 3 4import ( 5 "context" 6 "errors" 7 "log" 8 "os" 9 10 "github.com/blevesearch/bleve/v2" 11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 16 "github.com/blevesearch/bleve/v2/index/upsidedown" 17 "github.com/blevesearch/bleve/v2/mapping" 18 "github.com/blevesearch/bleve/v2/search/query" 19 "tangled.org/core/appview/db" 20 "tangled.org/core/appview/indexer/base36" 21 "tangled.org/core/appview/indexer/bleve" 22 "tangled.org/core/appview/models" 23 "tangled.org/core/appview/pagination" 24 tlog "tangled.org/core/log" 25) 26 27const ( 28 issueIndexerAnalyzer = "issueIndexer" 29 issueIndexerDocType = "issueIndexerDocType" 30 31 unicodeNormalizeName = "uicodeNormalize" 32) 33 34type Indexer struct { 35 indexer bleve.Index 36 path string 37} 38 39func NewIndexer(indexDir string) *Indexer { 40 return &Indexer{ 41 path: indexDir, 42 } 43} 44 45// Init initializes the indexer 46func (ix *Indexer) Init(ctx context.Context, e db.Execer) { 47 l := tlog.FromContext(ctx) 48 existed, err := ix.intialize(ctx) 49 if err != nil { 50 log.Fatalln("failed to initialize issue indexer", err) 51 } 52 if !existed { 53 l.Debug("Populating the issue indexer") 54 err := PopulateIndexer(ctx, ix, e) 55 if err != nil { 56 log.Fatalln("failed to populate issue indexer", err) 57 } 58 } 59 60 count, _ := ix.indexer.DocCount() 61 l.Info("Initialized the issue indexer", "docCount", count) 62} 63 64func generateIssueIndexMapping() (mapping.IndexMapping, error) { 65 mapping := bleve.NewIndexMapping() 66 docMapping := bleve.NewDocumentMapping() 67 68 textFieldMapping := bleve.NewTextFieldMapping() 69 textFieldMapping.Store = false 70 textFieldMapping.IncludeInAll = false 71 72 boolFieldMapping := bleve.NewBooleanFieldMapping() 73 boolFieldMapping.Store = false 74 boolFieldMapping.IncludeInAll = false 75 76 keywordFieldMapping := bleve.NewKeywordFieldMapping() 77 keywordFieldMapping.Store = false 78 keywordFieldMapping.IncludeInAll = false 79 80 // numericFieldMapping := bleve.NewNumericFieldMapping() 81 82 docMapping.AddFieldMappingsAt("title", textFieldMapping) 83 docMapping.AddFieldMappingsAt("body", textFieldMapping) 84 85 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 86 docMapping.AddFieldMappingsAt("is_open", boolFieldMapping) 87 docMapping.AddFieldMappingsAt("author_did", keywordFieldMapping) 88 docMapping.AddFieldMappingsAt("labels", keywordFieldMapping) 89 90 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 91 "type": unicodenorm.Name, 92 "form": unicodenorm.NFC, 93 }) 94 if err != nil { 95 return nil, err 96 } 97 98 err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ 99 "type": custom.Name, 100 "char_filters": []string{}, 101 "tokenizer": unicode.Name, 102 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 103 }) 104 if err != nil { 105 return nil, err 106 } 107 108 mapping.DefaultAnalyzer = issueIndexerAnalyzer 109 mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 110 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 111 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 112 113 return mapping, nil 114} 115 116func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 117 if ix.indexer != nil { 118 return false, errors.New("indexer is already initialized") 119 } 120 121 indexer, err := openIndexer(ctx, ix.path) 122 if err != nil { 123 return false, err 124 } 125 if indexer != nil { 126 ix.indexer = indexer 127 return true, nil 128 } 129 130 mapping, err := generateIssueIndexMapping() 131 if err != nil { 132 return false, err 133 } 134 indexer, err = bleve.New(ix.path, mapping) 135 if err != nil { 136 return false, err 137 } 138 139 ix.indexer = indexer 140 141 return false, nil 142} 143 144func openIndexer(ctx context.Context, path string) (bleve.Index, error) { 145 l := tlog.FromContext(ctx) 146 indexer, err := bleve.Open(path) 147 if err != nil { 148 if errors.Is(err, upsidedown.IncompatibleVersion) { 149 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding") 150 return nil, os.RemoveAll(path) 151 } 152 return nil, nil 153 } 154 return indexer, nil 155} 156 157func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error { 158 l := tlog.FromContext(ctx) 159 count := 0 160 err := pagination.IterateAll( 161 func(page pagination.Page) ([]models.Issue, error) { 162 return db.GetIssuesPaginated(e, page) 163 }, 164 func(issues []models.Issue) error { 165 count += len(issues) 166 return ix.Index(ctx, issues...) 167 }, 168 ) 169 l.Info("issues indexed", "count", count) 170 return err 171} 172 173type issueData struct { 174 ID int64 `json:"id"` 175 RepoAt string `json:"repo_at"` 176 IssueID int `json:"issue_id"` 177 Title string `json:"title"` 178 Body string `json:"body"` 179 IsOpen bool `json:"is_open"` 180 AuthorDid string `json:"author_did"` 181 Labels []string `json:"labels"` 182 183 Comments []IssueCommentData `json:"comments"` 184} 185 186func makeIssueData(issue *models.Issue) *issueData { 187 return &issueData{ 188 ID: issue.Id, 189 RepoAt: issue.RepoAt.String(), 190 IssueID: issue.IssueId, 191 Title: issue.Title, 192 Body: issue.Body, 193 IsOpen: issue.Open, 194 AuthorDid: issue.Did, 195 Labels: issue.Labels.LabelNames(), 196 } 197} 198 199// Type returns the document type, for bleve's mapping.Classifier interface. 200func (i *issueData) Type() string { 201 return issueIndexerDocType 202} 203 204type IssueCommentData struct { 205 Body string `json:"body"` 206} 207 208type SearchResult struct { 209 Hits []int64 210 Total uint64 211} 212 213const maxBatchSize = 20 214 215func (ix *Indexer) Index(ctx context.Context, issues ...models.Issue) error { 216 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize) 217 for _, issue := range issues { 218 issueData := makeIssueData(&issue) 219 if err := batch.Index(base36.Encode(issue.Id), issueData); err != nil { 220 return err 221 } 222 } 223 return batch.Flush() 224} 225 226func (ix *Indexer) Delete(ctx context.Context, issueId int64) error { 227 return ix.indexer.Delete(base36.Encode(issueId)) 228} 229 230func (ix *Indexer) Search(ctx context.Context, opts models.IssueSearchOptions) (*SearchResult, error) { 231 var musts []query.Query 232 var mustNots []query.Query 233 234 for _, keyword := range opts.Keywords { 235 musts = append(musts, bleve.NewDisjunctionQuery( 236 bleveutil.MatchAndQuery("title", keyword, issueIndexerAnalyzer, 0), 237 bleveutil.MatchAndQuery("body", keyword, issueIndexerAnalyzer, 0), 238 )) 239 } 240 241 for _, phrase := range opts.Phrases { 242 musts = append(musts, bleve.NewDisjunctionQuery( 243 bleveutil.MatchPhraseQuery("title", phrase, issueIndexerAnalyzer), 244 bleveutil.MatchPhraseQuery("body", phrase, issueIndexerAnalyzer), 245 )) 246 } 247 248 for _, keyword := range opts.NegatedKeywords { 249 mustNots = append(mustNots, bleve.NewDisjunctionQuery( 250 bleveutil.MatchAndQuery("title", keyword, issueIndexerAnalyzer, 0), 251 bleveutil.MatchAndQuery("body", keyword, issueIndexerAnalyzer, 0), 252 )) 253 } 254 255 for _, phrase := range opts.NegatedPhrases { 256 mustNots = append(mustNots, bleve.NewDisjunctionQuery( 257 bleveutil.MatchPhraseQuery("title", phrase, issueIndexerAnalyzer), 258 bleveutil.MatchPhraseQuery("body", phrase, issueIndexerAnalyzer), 259 )) 260 } 261 262 musts = append(musts, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt)) 263 if opts.IsOpen != nil { 264 musts = append(musts, bleveutil.BoolFieldQuery("is_open", *opts.IsOpen)) 265 } 266 267 if opts.AuthorDid != "" { 268 musts = append(musts, bleveutil.KeywordFieldQuery("author_did", opts.AuthorDid)) 269 } 270 271 for _, label := range opts.Labels { 272 musts = append(musts, bleveutil.KeywordFieldQuery("labels", label)) 273 } 274 275 if opts.NegatedAuthorDid != "" { 276 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("author_did", opts.NegatedAuthorDid)) 277 } 278 279 for _, label := range opts.NegatedLabels { 280 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("labels", label)) 281 } 282 283 indexerQuery := bleve.NewBooleanQuery() 284 indexerQuery.AddMust(musts...) 285 indexerQuery.AddMustNot(mustNots...) 286 searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false) 287 res, err := ix.indexer.SearchInContext(ctx, searchReq) 288 if err != nil { 289 return nil, nil 290 } 291 ret := &SearchResult{ 292 Total: res.Total, 293 Hits: make([]int64, len(res.Hits)), 294 } 295 for i, hit := range res.Hits { 296 id, err := base36.Decode(hit.ID) 297 if err != nil { 298 return nil, err 299 } 300 ret.Hits[i] = id 301 } 302 return ret, nil 303}