A vibe coded tangled fork which supports pijul.
1// heavily inspired by gitea's model (basically copy-pasted)
2package pulls_indexer
3
4import (
5 "context"
6 "errors"
7 "log"
8 "os"
9
10 "github.com/blevesearch/bleve/v2"
11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
16 "github.com/blevesearch/bleve/v2/index/upsidedown"
17 "github.com/blevesearch/bleve/v2/mapping"
18 "github.com/blevesearch/bleve/v2/search/query"
19 "tangled.org/core/appview/db"
20 "tangled.org/core/appview/indexer/base36"
21 "tangled.org/core/appview/indexer/bleve"
22 "tangled.org/core/appview/models"
23 tlog "tangled.org/core/log"
24)
25
26const (
27 pullIndexerAnalyzer = "pullIndexer"
28 pullIndexerDocType = "pullIndexerDocType"
29
30 unicodeNormalizeName = "uicodeNormalize"
31)
32
33type Indexer struct {
34 indexer bleve.Index
35 path string
36}
37
38func NewIndexer(indexDir string) *Indexer {
39 return &Indexer{
40 path: indexDir,
41 }
42}
43
44// Init initializes the indexer
45func (ix *Indexer) Init(ctx context.Context, e db.Execer) {
46 l := tlog.FromContext(ctx)
47 existed, err := ix.intialize(ctx)
48 if err != nil {
49 log.Fatalln("failed to initialize pull indexer", err)
50 }
51 if !existed {
52 l.Debug("Populating the pull indexer")
53 err := PopulateIndexer(ctx, ix, e)
54 if err != nil {
55 log.Fatalln("failed to populate pull indexer", err)
56 }
57 }
58
59 count, _ := ix.indexer.DocCount()
60 l.Info("Initialized the pull indexer", "docCount", count)
61}
62
63func generatePullIndexMapping() (mapping.IndexMapping, error) {
64 mapping := bleve.NewIndexMapping()
65 docMapping := bleve.NewDocumentMapping()
66
67 textFieldMapping := bleve.NewTextFieldMapping()
68 textFieldMapping.Store = false
69 textFieldMapping.IncludeInAll = false
70
71 keywordFieldMapping := bleve.NewKeywordFieldMapping()
72 keywordFieldMapping.Store = false
73 keywordFieldMapping.IncludeInAll = false
74
75 // numericFieldMapping := bleve.NewNumericFieldMapping()
76
77 docMapping.AddFieldMappingsAt("title", textFieldMapping)
78 docMapping.AddFieldMappingsAt("body", textFieldMapping)
79
80 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping)
81 docMapping.AddFieldMappingsAt("state", keywordFieldMapping)
82 docMapping.AddFieldMappingsAt("author_did", keywordFieldMapping)
83 docMapping.AddFieldMappingsAt("labels", keywordFieldMapping)
84
85 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
86 "type": unicodenorm.Name,
87 "form": unicodenorm.NFC,
88 })
89 if err != nil {
90 return nil, err
91 }
92
93 err = mapping.AddCustomAnalyzer(pullIndexerAnalyzer, map[string]any{
94 "type": custom.Name,
95 "char_filters": []string{},
96 "tokenizer": unicode.Name,
97 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
98 })
99 if err != nil {
100 return nil, err
101 }
102
103 mapping.DefaultAnalyzer = pullIndexerAnalyzer
104 mapping.AddDocumentMapping(pullIndexerDocType, docMapping)
105 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
106 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping()
107
108 return mapping, nil
109}
110
111func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
112 if ix.indexer != nil {
113 return false, errors.New("indexer is already initialized")
114 }
115
116 indexer, err := openIndexer(ctx, ix.path)
117 if err != nil {
118 return false, err
119 }
120 if indexer != nil {
121 ix.indexer = indexer
122 return true, nil
123 }
124
125 mapping, err := generatePullIndexMapping()
126 if err != nil {
127 return false, err
128 }
129 indexer, err = bleve.New(ix.path, mapping)
130 if err != nil {
131 return false, err
132 }
133
134 ix.indexer = indexer
135
136 return false, nil
137}
138
139func openIndexer(ctx context.Context, path string) (bleve.Index, error) {
140 l := tlog.FromContext(ctx)
141 indexer, err := bleve.Open(path)
142 if err != nil {
143 if errors.Is(err, upsidedown.IncompatibleVersion) {
144 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding")
145 return nil, os.RemoveAll(path)
146 }
147 return nil, nil
148 }
149 return indexer, nil
150}
151
152func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error {
153 l := tlog.FromContext(ctx)
154
155 pulls, err := db.GetPulls(e)
156 if err != nil {
157 return err
158 }
159 count := len(pulls)
160 err = ix.Index(ctx, pulls...)
161 if err != nil {
162 return err
163 }
164 l.Info("pulls indexed", "count", count)
165 return err
166}
167
168type pullData struct {
169 ID int64 `json:"id"`
170 RepoAt string `json:"repo_at"`
171 PullID int `json:"pull_id"`
172 Title string `json:"title"`
173 Body string `json:"body"`
174 State string `json:"state"`
175 AuthorDid string `json:"author_did"`
176 Labels []string `json:"labels"`
177
178 Comments []pullCommentData `json:"comments"`
179}
180
181func makePullData(pull *models.Pull) *pullData {
182 return &pullData{
183 ID: int64(pull.ID),
184 RepoAt: pull.RepoAt.String(),
185 PullID: pull.PullId,
186 Title: pull.Title,
187 Body: pull.Body,
188 State: pull.State.String(),
189 AuthorDid: pull.OwnerDid,
190 Labels: pull.Labels.LabelNames(),
191 }
192}
193
194// Type returns the document type, for bleve's mapping.Classifier interface.
195func (i *pullData) Type() string {
196 return pullIndexerDocType
197}
198
199type pullCommentData struct {
200 Body string `json:"body"`
201}
202
203type searchResult struct {
204 Hits []int64
205 Total uint64
206}
207
208const maxBatchSize = 20
209
210func (ix *Indexer) Index(ctx context.Context, pulls ...*models.Pull) error {
211 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize)
212 for _, pull := range pulls {
213 pullData := makePullData(pull)
214 if err := batch.Index(base36.Encode(pullData.ID), pullData); err != nil {
215 return err
216 }
217 }
218 return batch.Flush()
219}
220
221func (ix *Indexer) Delete(ctx context.Context, pullID int64) error {
222 return ix.indexer.Delete(base36.Encode(pullID))
223}
224
225func (ix *Indexer) Search(ctx context.Context, opts models.PullSearchOptions) (*searchResult, error) {
226 var musts []query.Query
227 var mustNots []query.Query
228
229 // TODO(boltless): remove this after implementing pulls page pagination
230 limit := opts.Page.Limit
231 if limit == 0 {
232 limit = 500
233 }
234
235 for _, keyword := range opts.Keywords {
236 musts = append(musts, bleve.NewDisjunctionQuery(
237 bleveutil.MatchAndQuery("title", keyword, pullIndexerAnalyzer, 0),
238 bleveutil.MatchAndQuery("body", keyword, pullIndexerAnalyzer, 0),
239 ))
240 }
241
242 for _, phrase := range opts.Phrases {
243 musts = append(musts, bleve.NewDisjunctionQuery(
244 bleveutil.MatchPhraseQuery("title", phrase, pullIndexerAnalyzer),
245 bleveutil.MatchPhraseQuery("body", phrase, pullIndexerAnalyzer),
246 ))
247 }
248
249 for _, keyword := range opts.NegatedKeywords {
250 mustNots = append(mustNots, bleve.NewDisjunctionQuery(
251 bleveutil.MatchAndQuery("title", keyword, pullIndexerAnalyzer, 0),
252 bleveutil.MatchAndQuery("body", keyword, pullIndexerAnalyzer, 0),
253 ))
254 }
255
256 for _, phrase := range opts.NegatedPhrases {
257 mustNots = append(mustNots, bleve.NewDisjunctionQuery(
258 bleveutil.MatchPhraseQuery("title", phrase, pullIndexerAnalyzer),
259 bleveutil.MatchPhraseQuery("body", phrase, pullIndexerAnalyzer),
260 ))
261 }
262
263 musts = append(musts, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt))
264 if opts.State != nil {
265 musts = append(musts, bleveutil.KeywordFieldQuery("state", opts.State.String()))
266 }
267
268 if opts.AuthorDid != "" {
269 musts = append(musts, bleveutil.KeywordFieldQuery("author_did", opts.AuthorDid))
270 }
271
272 for _, label := range opts.Labels {
273 musts = append(musts, bleveutil.KeywordFieldQuery("labels", label))
274 }
275
276 if opts.NegatedAuthorDid != "" {
277 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("author_did", opts.NegatedAuthorDid))
278 }
279
280 for _, label := range opts.NegatedLabels {
281 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("labels", label))
282 }
283
284 indexerQuery := bleve.NewBooleanQuery()
285 indexerQuery.AddMust(musts...)
286 indexerQuery.AddMustNot(mustNots...)
287 searchReq := bleve.NewSearchRequestOptions(indexerQuery, limit, opts.Page.Offset, false)
288 res, err := ix.indexer.SearchInContext(ctx, searchReq)
289 if err != nil {
290 return nil, nil
291 }
292 ret := &searchResult{
293 Total: res.Total,
294 Hits: make([]int64, len(res.Hits)),
295 }
296 for i, hit := range res.Hits {
297 id, err := base36.Decode(hit.ID)
298 if err != nil {
299 return nil, err
300 }
301 ret.Hits[i] = id
302 }
303 return ret, nil
304}