yp05327
70b7df0e5e
Close #278 Close #24076 ## Solutions: - Use [google/licenseclassifier](https://github.com/google/licenseclassifier/) Test result between [google/licensecheck](https://github.com/google/licensecheck) and [go-license-detector](https://github.com/go-enry/go-license-detector): https://github.com/go-gitea/gitea/pull/24872#issuecomment-1560361167 Test result between [google/licensecheck](https://github.com/google/licensecheck) and [google/licenseclassifier](https://github.com/google/licenseclassifier/): https://github.com/go-gitea/gitea/pull/24872#issuecomment-1576092178 - Generate License Convert Name List to avoid import license templates with same contents Gitea automatically get latest license data from[ spdx/license-list-data](https://github.com/spdx/license-list-data). But unfortunately, some license templates have same contents. #20915 [click here to see the list](https://github.com/go-gitea/gitea/pull/24872#issuecomment-1584141684) So we will generate a list of these license templates with same contents and create a new file to save the result when using `make generate-license`. (Need to decide the save path) - Save License info into a new table `repo_license` Can easily support searching repo by license in the future. ## Screen shot Single License: ![image](https://github.com/go-gitea/gitea/assets/18380374/41260bd7-0b4c-4038-8592-508706cffa9f) Multiple Licenses: ![image](https://github.com/go-gitea/gitea/assets/18380374/34ce2f73-7e18-446b-9b96-ecc4fb61bd70) Triggers: - [x] Push commit to default branch - [x] Create repo - [x] Mirror repo - [x] When Default Branch is changed, licenses should be updated Todo: - [x] Save Licenses info in to DB when there's a change to license file in the commit - [x] DB Migration - [x] A nominal test? - [x] Select which library to use(https://github.com/go-gitea/gitea/pull/24872#issuecomment-1560361167) - [x] API Support - [x] Add repo license table - ~Select license in settings if there are several licenses(Not recommended)~ - License board(later, not in this PR) ![image](https://github.com/go-gitea/gitea/assets/18380374/2c3c3bf8-bcc2-4c6d-8ce0-81d1a9733878) --------- Co-authored-by: silverwind <me@silverwind.io> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com> Co-authored-by: Denys Konovalov <kontakt@denyskon.de> Co-authored-by: delvh <dev.lh@web.de> Co-authored-by: KN4CK3R <admin@oldschoolhack.me> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: 6543 <m.huber@kithara.com> Co-authored-by: a1012112796 <1012112796@qq.com> Co-authored-by: techknowlogick <techknowlogick@gitea.com>
206 lines
4.9 KiB
Go
206 lines
4.9 KiB
Go
// Copyright 2024 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package repository
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
|
|
"code.gitea.io/gitea/models/db"
|
|
repo_model "code.gitea.io/gitea/models/repo"
|
|
"code.gitea.io/gitea/modules/container"
|
|
"code.gitea.io/gitea/modules/git"
|
|
"code.gitea.io/gitea/modules/gitrepo"
|
|
"code.gitea.io/gitea/modules/graceful"
|
|
"code.gitea.io/gitea/modules/json"
|
|
"code.gitea.io/gitea/modules/log"
|
|
"code.gitea.io/gitea/modules/options"
|
|
"code.gitea.io/gitea/modules/queue"
|
|
|
|
licenseclassifier "github.com/google/licenseclassifier/v2"
|
|
)
|
|
|
|
var (
|
|
classifier *licenseclassifier.Classifier
|
|
LicenseFileName = "LICENSE"
|
|
licenseAliases map[string]string
|
|
|
|
// licenseUpdaterQueue represents a queue to handle update repo licenses
|
|
licenseUpdaterQueue *queue.WorkerPoolQueue[*LicenseUpdaterOptions]
|
|
)
|
|
|
|
func AddRepoToLicenseUpdaterQueue(opts *LicenseUpdaterOptions) error {
|
|
if opts == nil {
|
|
return nil
|
|
}
|
|
return licenseUpdaterQueue.Push(opts)
|
|
}
|
|
|
|
func loadLicenseAliases() error {
|
|
if licenseAliases != nil {
|
|
return nil
|
|
}
|
|
|
|
data, err := options.AssetFS().ReadFile("license", "etc", "license-aliases.json")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = json.Unmarshal(data, &licenseAliases)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func ConvertLicenseName(name string) string {
|
|
if err := loadLicenseAliases(); err != nil {
|
|
return name
|
|
}
|
|
|
|
v, ok := licenseAliases[name]
|
|
if ok {
|
|
return v
|
|
}
|
|
return name
|
|
}
|
|
|
|
func InitLicenseClassifier() error {
|
|
// threshold should be 0.84~0.86 or the test will be failed
|
|
classifier = licenseclassifier.NewClassifier(.85)
|
|
licenseFiles, err := options.AssetFS().ListFiles("license", true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
existLicense := make(container.Set[string])
|
|
if len(licenseFiles) > 0 {
|
|
for _, licenseFile := range licenseFiles {
|
|
licenseName := ConvertLicenseName(licenseFile)
|
|
if existLicense.Contains(licenseName) {
|
|
continue
|
|
}
|
|
existLicense.Add(licenseName)
|
|
data, err := options.License(licenseFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
classifier.AddContent("License", licenseFile, licenseName, data)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
type LicenseUpdaterOptions struct {
|
|
RepoID int64
|
|
}
|
|
|
|
func repoLicenseUpdater(items ...*LicenseUpdaterOptions) []*LicenseUpdaterOptions {
|
|
ctx := graceful.GetManager().ShutdownContext()
|
|
|
|
for _, opts := range items {
|
|
repo, err := repo_model.GetRepositoryByID(ctx, opts.RepoID)
|
|
if err != nil {
|
|
log.Error("repoLicenseUpdater [%d] failed: GetRepositoryByID: %v", opts.RepoID, err)
|
|
continue
|
|
}
|
|
if repo.IsEmpty {
|
|
continue
|
|
}
|
|
|
|
gitRepo, err := gitrepo.OpenRepository(ctx, repo)
|
|
if err != nil {
|
|
log.Error("repoLicenseUpdater [%d] failed: OpenRepository: %v", opts.RepoID, err)
|
|
continue
|
|
}
|
|
defer gitRepo.Close()
|
|
|
|
commit, err := gitRepo.GetBranchCommit(repo.DefaultBranch)
|
|
if err != nil {
|
|
log.Error("repoLicenseUpdater [%d] failed: GetBranchCommit: %v", opts.RepoID, err)
|
|
continue
|
|
}
|
|
if err = UpdateRepoLicenses(ctx, repo, commit); err != nil {
|
|
log.Error("repoLicenseUpdater [%d] failed: updateRepoLicenses: %v", opts.RepoID, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func SyncRepoLicenses(ctx context.Context) error {
|
|
log.Trace("Doing: SyncRepoLicenses")
|
|
|
|
if err := db.Iterate(
|
|
ctx,
|
|
nil,
|
|
func(ctx context.Context, repo *repo_model.Repository) error {
|
|
select {
|
|
case <-ctx.Done():
|
|
return db.ErrCancelledf("before sync repo licenses for %s", repo.FullName())
|
|
default:
|
|
}
|
|
return AddRepoToLicenseUpdaterQueue(&LicenseUpdaterOptions{RepoID: repo.ID})
|
|
},
|
|
); err != nil {
|
|
log.Trace("Error: SyncRepoLicenses: %v", err)
|
|
return err
|
|
}
|
|
|
|
log.Trace("Finished: SyncReposLicenses")
|
|
return nil
|
|
}
|
|
|
|
// UpdateRepoLicenses will update repository licenses col if license file exists
|
|
func UpdateRepoLicenses(ctx context.Context, repo *repo_model.Repository, commit *git.Commit) error {
|
|
if commit == nil {
|
|
return nil
|
|
}
|
|
|
|
b, err := commit.GetBlobByPath(LicenseFileName)
|
|
if err != nil && !git.IsErrNotExist(err) {
|
|
return fmt.Errorf("GetBlobByPath: %w", err)
|
|
}
|
|
|
|
if git.IsErrNotExist(err) {
|
|
return repo_model.CleanRepoLicenses(ctx, repo)
|
|
}
|
|
|
|
licenses := make([]string, 0)
|
|
if b != nil {
|
|
r, err := b.DataAsync()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer r.Close()
|
|
|
|
licenses, err = detectLicense(r)
|
|
if err != nil {
|
|
return fmt.Errorf("detectLicense: %w", err)
|
|
}
|
|
}
|
|
return repo_model.UpdateRepoLicenses(ctx, repo, commit.ID.String(), licenses)
|
|
}
|
|
|
|
// detectLicense returns the licenses detected by the given content buff
|
|
func detectLicense(r io.Reader) ([]string, error) {
|
|
if r == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
matches, err := classifier.MatchFrom(r)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if len(matches.Matches) > 0 {
|
|
results := make(container.Set[string], len(matches.Matches))
|
|
for _, r := range matches.Matches {
|
|
if r.MatchType == "License" && !results.Contains(r.Variant) {
|
|
results.Add(r.Variant)
|
|
}
|
|
}
|
|
return results.Values(), nil
|
|
}
|
|
return nil, nil
|
|
}
|