A vibe coded tangled fork which supports pijul.
1// heavily inspired by gitea's model (basically copy-pasted)
2package issues_indexer
3
4import (
5 "context"
6 "errors"
7 "log"
8 "os"
9
10 "github.com/blevesearch/bleve/v2"
11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
16 "github.com/blevesearch/bleve/v2/index/upsidedown"
17 "github.com/blevesearch/bleve/v2/mapping"
18 "github.com/blevesearch/bleve/v2/search/query"
19 "tangled.org/core/appview/db"
20 "tangled.org/core/appview/indexer/base36"
21 "tangled.org/core/appview/indexer/bleve"
22 "tangled.org/core/appview/models"
23 "tangled.org/core/appview/pagination"
24 tlog "tangled.org/core/log"
25)
26
27const (
28 issueIndexerAnalyzer = "issueIndexer"
29 issueIndexerDocType = "issueIndexerDocType"
30
31 unicodeNormalizeName = "uicodeNormalize"
32)
33
34type Indexer struct {
35 indexer bleve.Index
36 path string
37}
38
39func NewIndexer(indexDir string) *Indexer {
40 return &Indexer{
41 path: indexDir,
42 }
43}
44
45// Init initializes the indexer
46func (ix *Indexer) Init(ctx context.Context, e db.Execer) {
47 l := tlog.FromContext(ctx)
48 existed, err := ix.intialize(ctx)
49 if err != nil {
50 log.Fatalln("failed to initialize issue indexer", err)
51 }
52 if !existed {
53 l.Debug("Populating the issue indexer")
54 err := PopulateIndexer(ctx, ix, e)
55 if err != nil {
56 log.Fatalln("failed to populate issue indexer", err)
57 }
58 }
59
60 count, _ := ix.indexer.DocCount()
61 l.Info("Initialized the issue indexer", "docCount", count)
62}
63
64func generateIssueIndexMapping() (mapping.IndexMapping, error) {
65 mapping := bleve.NewIndexMapping()
66 docMapping := bleve.NewDocumentMapping()
67
68 textFieldMapping := bleve.NewTextFieldMapping()
69 textFieldMapping.Store = false
70 textFieldMapping.IncludeInAll = false
71
72 boolFieldMapping := bleve.NewBooleanFieldMapping()
73 boolFieldMapping.Store = false
74 boolFieldMapping.IncludeInAll = false
75
76 keywordFieldMapping := bleve.NewKeywordFieldMapping()
77 keywordFieldMapping.Store = false
78 keywordFieldMapping.IncludeInAll = false
79
80 // numericFieldMapping := bleve.NewNumericFieldMapping()
81
82 docMapping.AddFieldMappingsAt("title", textFieldMapping)
83 docMapping.AddFieldMappingsAt("body", textFieldMapping)
84
85 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping)
86 docMapping.AddFieldMappingsAt("is_open", boolFieldMapping)
87 docMapping.AddFieldMappingsAt("author_did", keywordFieldMapping)
88 docMapping.AddFieldMappingsAt("labels", keywordFieldMapping)
89
90 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
91 "type": unicodenorm.Name,
92 "form": unicodenorm.NFC,
93 })
94 if err != nil {
95 return nil, err
96 }
97
98 err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
99 "type": custom.Name,
100 "char_filters": []string{},
101 "tokenizer": unicode.Name,
102 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
103 })
104 if err != nil {
105 return nil, err
106 }
107
108 mapping.DefaultAnalyzer = issueIndexerAnalyzer
109 mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
110 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
111 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping()
112
113 return mapping, nil
114}
115
116func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
117 if ix.indexer != nil {
118 return false, errors.New("indexer is already initialized")
119 }
120
121 indexer, err := openIndexer(ctx, ix.path)
122 if err != nil {
123 return false, err
124 }
125 if indexer != nil {
126 ix.indexer = indexer
127 return true, nil
128 }
129
130 mapping, err := generateIssueIndexMapping()
131 if err != nil {
132 return false, err
133 }
134 indexer, err = bleve.New(ix.path, mapping)
135 if err != nil {
136 return false, err
137 }
138
139 ix.indexer = indexer
140
141 return false, nil
142}
143
144func openIndexer(ctx context.Context, path string) (bleve.Index, error) {
145 l := tlog.FromContext(ctx)
146 indexer, err := bleve.Open(path)
147 if err != nil {
148 if errors.Is(err, upsidedown.IncompatibleVersion) {
149 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding")
150 return nil, os.RemoveAll(path)
151 }
152 return nil, nil
153 }
154 return indexer, nil
155}
156
157func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error {
158 l := tlog.FromContext(ctx)
159 count := 0
160 err := pagination.IterateAll(
161 func(page pagination.Page) ([]models.Issue, error) {
162 return db.GetIssuesPaginated(e, page)
163 },
164 func(issues []models.Issue) error {
165 count += len(issues)
166 return ix.Index(ctx, issues...)
167 },
168 )
169 l.Info("issues indexed", "count", count)
170 return err
171}
172
173type issueData struct {
174 ID int64 `json:"id"`
175 RepoAt string `json:"repo_at"`
176 IssueID int `json:"issue_id"`
177 Title string `json:"title"`
178 Body string `json:"body"`
179 IsOpen bool `json:"is_open"`
180 AuthorDid string `json:"author_did"`
181 Labels []string `json:"labels"`
182
183 Comments []IssueCommentData `json:"comments"`
184}
185
186func makeIssueData(issue *models.Issue) *issueData {
187 return &issueData{
188 ID: issue.Id,
189 RepoAt: issue.RepoAt.String(),
190 IssueID: issue.IssueId,
191 Title: issue.Title,
192 Body: issue.Body,
193 IsOpen: issue.Open,
194 AuthorDid: issue.Did,
195 Labels: issue.Labels.LabelNames(),
196 }
197}
198
199// Type returns the document type, for bleve's mapping.Classifier interface.
200func (i *issueData) Type() string {
201 return issueIndexerDocType
202}
203
204type IssueCommentData struct {
205 Body string `json:"body"`
206}
207
208type SearchResult struct {
209 Hits []int64
210 Total uint64
211}
212
213const maxBatchSize = 20
214
215func (ix *Indexer) Index(ctx context.Context, issues ...models.Issue) error {
216 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize)
217 for _, issue := range issues {
218 issueData := makeIssueData(&issue)
219 if err := batch.Index(base36.Encode(issue.Id), issueData); err != nil {
220 return err
221 }
222 }
223 return batch.Flush()
224}
225
226func (ix *Indexer) Delete(ctx context.Context, issueId int64) error {
227 return ix.indexer.Delete(base36.Encode(issueId))
228}
229
230func (ix *Indexer) Search(ctx context.Context, opts models.IssueSearchOptions) (*SearchResult, error) {
231 var musts []query.Query
232 var mustNots []query.Query
233
234 for _, keyword := range opts.Keywords {
235 musts = append(musts, bleve.NewDisjunctionQuery(
236 bleveutil.MatchAndQuery("title", keyword, issueIndexerAnalyzer, 0),
237 bleveutil.MatchAndQuery("body", keyword, issueIndexerAnalyzer, 0),
238 ))
239 }
240
241 for _, phrase := range opts.Phrases {
242 musts = append(musts, bleve.NewDisjunctionQuery(
243 bleveutil.MatchPhraseQuery("title", phrase, issueIndexerAnalyzer),
244 bleveutil.MatchPhraseQuery("body", phrase, issueIndexerAnalyzer),
245 ))
246 }
247
248 for _, keyword := range opts.NegatedKeywords {
249 mustNots = append(mustNots, bleve.NewDisjunctionQuery(
250 bleveutil.MatchAndQuery("title", keyword, issueIndexerAnalyzer, 0),
251 bleveutil.MatchAndQuery("body", keyword, issueIndexerAnalyzer, 0),
252 ))
253 }
254
255 for _, phrase := range opts.NegatedPhrases {
256 mustNots = append(mustNots, bleve.NewDisjunctionQuery(
257 bleveutil.MatchPhraseQuery("title", phrase, issueIndexerAnalyzer),
258 bleveutil.MatchPhraseQuery("body", phrase, issueIndexerAnalyzer),
259 ))
260 }
261
262 musts = append(musts, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt))
263 if opts.IsOpen != nil {
264 musts = append(musts, bleveutil.BoolFieldQuery("is_open", *opts.IsOpen))
265 }
266
267 if opts.AuthorDid != "" {
268 musts = append(musts, bleveutil.KeywordFieldQuery("author_did", opts.AuthorDid))
269 }
270
271 for _, label := range opts.Labels {
272 musts = append(musts, bleveutil.KeywordFieldQuery("labels", label))
273 }
274
275 if opts.NegatedAuthorDid != "" {
276 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("author_did", opts.NegatedAuthorDid))
277 }
278
279 for _, label := range opts.NegatedLabels {
280 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("labels", label))
281 }
282
283 indexerQuery := bleve.NewBooleanQuery()
284 indexerQuery.AddMust(musts...)
285 indexerQuery.AddMustNot(mustNots...)
286 searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false)
287 res, err := ix.indexer.SearchInContext(ctx, searchReq)
288 if err != nil {
289 return nil, nil
290 }
291 ret := &SearchResult{
292 Total: res.Total,
293 Hits: make([]int64, len(res.Hits)),
294 }
295 for i, hit := range res.Hits {
296 id, err := base36.Decode(hit.ID)
297 if err != nil {
298 return nil, err
299 }
300 ret.Hits[i] = id
301 }
302 return ret, nil
303}