* Update go-enry to v2.5.2
This commit is contained in:
2
go.mod
2
go.mod
@ -37,7 +37,7 @@ require (
|
||||
github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect
|
||||
github.com/gliderlabs/ssh v0.2.2
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a // indirect
|
||||
github.com/go-enry/go-enry/v2 v2.3.0
|
||||
github.com/go-enry/go-enry/v2 v2.5.2
|
||||
github.com/go-git/go-billy/v5 v5.0.0
|
||||
github.com/go-git/go-git/v5 v5.0.0
|
||||
github.com/go-openapi/jsonreference v0.19.3 // indirect
|
||||
|
12
go.sum
12
go.sum
@ -193,10 +193,10 @@ github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a h1:FQqo
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
|
||||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
|
||||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
|
||||
github.com/go-enry/go-enry/v2 v2.3.0 h1:o8KwgY6uSplysrIpj+Y42J/xGPp90ogVpxE2Z3s8Unk=
|
||||
github.com/go-enry/go-enry/v2 v2.3.0/go.mod h1:+xFJwbqWi15bvqFHb2ELUWVRKFQtwB61+sDrkvvxxGI=
|
||||
github.com/go-enry/go-oniguruma v1.2.0 h1:oBO9XC1IDT9+AoWW5oFsa/7gFeOPacEqDbyXZKWXuDs=
|
||||
github.com/go-enry/go-oniguruma v1.2.0/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
|
||||
github.com/go-enry/go-enry/v2 v2.5.2 h1:3f3PFAO6JitWkPi1GQ5/m6Xu4gNL1U5soJ8QaYqJ0YQ=
|
||||
github.com/go-enry/go-enry/v2 v2.5.2/go.mod h1:GVzIiAytiS5uT/QiuakK7TF1u4xDab87Y8V5EJRpsIQ=
|
||||
github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo=
|
||||
github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
|
||||
github.com/go-git/gcfg v1.5.0 h1:Q5ViNfGF8zFgyJWPqYwA7qGFoMTEiBmdlkcfRmpIMa4=
|
||||
github.com/go-git/gcfg v1.5.0/go.mod h1:5m20vg6GwYabIxaOonVkTdrILxQMpEShl1xiMF4ua+E=
|
||||
github.com/go-git/go-billy/v5 v5.0.0 h1:7NQHvd9FVid8VL4qVUMm8XifBK+2xCoZ2lSk0agRrHM=
|
||||
@ -616,8 +616,6 @@ github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDW
|
||||
github.com/tinylib/msgp v1.1.2 h1:gWmO7n0Ys2RBEb7GPYB9Ujq8Mk5p2U08lRnmMcGy6BQ=
|
||||
github.com/tinylib/msgp v1.1.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
|
||||
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
|
||||
github.com/toqueteos/trie v1.0.0 h1:8i6pXxNUXNRAqP246iibb7w/pSFquNTQ+uNfriG7vlk=
|
||||
github.com/toqueteos/trie v1.0.0/go.mod h1:Ywk48QhEqhU1+DwhMkJ2x7eeGxDHiGkAdc9+0DYcbsM=
|
||||
github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ=
|
||||
github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM=
|
||||
github.com/tstranex/u2f v1.0.0 h1:HhJkSzDDlVSVIVt7pDJwCHQj67k7A5EeBgPmeD+pVsQ=
|
||||
@ -876,8 +874,6 @@ gopkg.in/testfixtures.v2 v2.5.0 h1:N08B7l2GzFQenyYbzqthDnKAA+cmb17iAZhhFxr7JHw=
|
||||
gopkg.in/testfixtures.v2 v2.5.0/go.mod h1:vyAq+MYCgNpR29qitQdLZhdbLFf4mR/2MFJRFoQZZ2M=
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
||||
gopkg.in/toqueteos/substring.v1 v1.0.2 h1:urLqCeMm6x/eTuQa1oZerNw8N1KNOIp5hD5kGL7lFsE=
|
||||
gopkg.in/toqueteos/substring.v1 v1.0.2/go.mod h1:Eb2Z1UYehlVK8LYW2WBVR2rwbujsz3aX8XDrM1vbNew=
|
||||
gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME=
|
||||
gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI=
|
||||
gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74=
|
||||
|
@ -10,8 +10,8 @@ import (
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
)
|
||||
|
||||
// GetCodeLanguageWithCallback detects code language based on file name and content using callback
|
||||
func GetCodeLanguageWithCallback(filename string, contentFunc func() ([]byte, error)) string {
|
||||
// GetCodeLanguage detects code language based on file name and content
|
||||
func GetCodeLanguage(filename string, content []byte) string {
|
||||
if language, ok := enry.GetLanguageByExtension(filename); ok {
|
||||
return language
|
||||
}
|
||||
@ -20,17 +20,9 @@ func GetCodeLanguageWithCallback(filename string, contentFunc func() ([]byte, er
|
||||
return language
|
||||
}
|
||||
|
||||
content, err := contentFunc()
|
||||
if err != nil {
|
||||
if len(content) == 0 {
|
||||
return enry.OtherLanguage
|
||||
}
|
||||
|
||||
return enry.GetLanguage(filepath.Base(filename), content)
|
||||
}
|
||||
|
||||
// GetCodeLanguage detects code language based on file name and content
|
||||
func GetCodeLanguage(filename string, content []byte) string {
|
||||
return GetCodeLanguageWithCallback(filename, func() ([]byte, error) {
|
||||
return content, nil
|
||||
})
|
||||
}
|
||||
|
@ -50,11 +50,15 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
|
||||
return nil
|
||||
}
|
||||
|
||||
// If content can not be read just do detection by filename
|
||||
content, _ := readFile(f, fileSizeLimit)
|
||||
if enry.IsGenerated(f.Name, content) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: Use .gitattributes file for linguist overrides
|
||||
|
||||
language := analyze.GetCodeLanguageWithCallback(f.Name, func() ([]byte, error) {
|
||||
return readFile(f, fileSizeLimit)
|
||||
})
|
||||
language := analyze.GetCodeLanguage(f.Name, content)
|
||||
if language == enry.OtherLanguage || language == "" {
|
||||
return nil
|
||||
}
|
||||
|
174
vendor/github.com/go-enry/go-enry/v2/README.md
generated
vendored
174
vendor/github.com/go-enry/go-enry/v2/README.md
generated
vendored
@ -1,26 +1,26 @@
|
||||
# go-enry [](https://pkg.go.dev/github.com/go-enry/go-enry/v2) [](https://github.com/go-enry/go-enry/actions?query=workflow%3ATest+branch%3Amaster) [](https://codecov.io/gh/go-enry/go-enry)
|
||||
|
||||
Programming language detector and toolbox to ignore binary or vendored files. *enry*, started as a port to _Go_ of the original [Linguist](https://github.com/github/linguist) _Ruby_ library, that has an improved *2x performance*.
|
||||
Programming language detector and toolbox to ignore binary or vendored files. _enry_, started as a port to _Go_ of the original [Linguist](https://github.com/github/linguist) _Ruby_ library, that has an improved _2x performance_.
|
||||
|
||||
* [CLI](#cli)
|
||||
* [Library](#library)
|
||||
* [Use cases](#use-cases)
|
||||
* [By filename](#by-filename)
|
||||
* [By text](#by-text)
|
||||
* [By file](#by-file)
|
||||
* [Filtering](#filtering-vendoring-binaries-etc)
|
||||
* [Coloring](#language-colors-and-groups)
|
||||
* [Languages](#languages)
|
||||
* [Go](#go)
|
||||
* [Java bindings](#java-bindings)
|
||||
* [Python bindings](#python-bindings)
|
||||
* [Divergences from linguist](#divergences-from-linguist)
|
||||
* [Benchmarks](#benchmarks)
|
||||
* [Why Enry?](#why-enry)
|
||||
* [Development](#development)
|
||||
* [Sync with github/linguist upstream](#sync-with-githublinguist-upstream)
|
||||
* [Misc](#misc)
|
||||
* [License](#license)
|
||||
- [CLI](#cli)
|
||||
- [Library](#library)
|
||||
- [Use cases](#use-cases)
|
||||
- [By filename](#by-filename)
|
||||
- [By text](#by-text)
|
||||
- [By file](#by-file)
|
||||
- [Filtering](#filtering-vendoring-binaries-etc)
|
||||
- [Coloring](#language-colors-and-groups)
|
||||
- [Languages](#languages)
|
||||
- [Go](#go)
|
||||
- [Java bindings](#java-bindings)
|
||||
- [Python bindings](#python-bindings)
|
||||
- [Divergences from linguist](#divergences-from-linguist)
|
||||
- [Benchmarks](#benchmarks)
|
||||
- [Why Enry?](#why-enry)
|
||||
- [Development](#development)
|
||||
- [Sync with github/linguist upstream](#sync-with-githublinguist-upstream)
|
||||
- [Misc](#misc)
|
||||
- [License](#license)
|
||||
|
||||
# CLI
|
||||
|
||||
@ -28,50 +28,62 @@ The CLI binary is hosted in a separate repository [go-enry/enry](https://github.
|
||||
|
||||
# Library
|
||||
|
||||
*enry* is also a Go library for guessing a programming language that exposes API through FFI to multiple programming environments.
|
||||
_enry_ is also a Go library for guessing a programming language that exposes API through FFI to multiple programming environments.
|
||||
|
||||
## Use cases
|
||||
|
||||
*enry* guesses a programming language using a sequence of matching *strategies* that are
|
||||
applied progressively to narrow down the possible options. Each *strategy* varies on the type
|
||||
_enry_ guesses a programming language using a sequence of matching _strategies_ that are
|
||||
applied progressively to narrow down the possible options. Each _strategy_ varies on the type
|
||||
of input data that it needs to make a decision: file name, extension, the first line of the file, the full content of the file, etc.
|
||||
|
||||
Depending on available input data, enry API can be roughly divided into the next categories or use cases.
|
||||
|
||||
### By filename
|
||||
Next functions require only a name of the file to make a guess:
|
||||
- `GetLanguageByExtension` uses only file extension (wich may be ambiguous)
|
||||
- `GetLanguageByFilename` useful for cases like `.gitignore`, `.bashrc`, etc
|
||||
- all [filtering helpers](#filtering)
|
||||
|
||||
Please note that such guesses are expected not to be very accurate.
|
||||
Next functions require only a name of the file to make a guess:
|
||||
|
||||
- `GetLanguageByExtension` uses only file extension (wich may be ambiguous)
|
||||
- `GetLanguageByFilename` useful for cases like `.gitignore`, `.bashrc`, etc
|
||||
- all [filtering helpers](#filtering)
|
||||
|
||||
Please note that such guesses are expected not to be very accurate.
|
||||
|
||||
### By text
|
||||
|
||||
To make a guess only based on the content of the file or a text snippet, use
|
||||
- `GetLanguageByShebang` reads only the first line of text to identify the [shebang](https://en.wikipedia.org/wiki/Shebang_(Unix)).
|
||||
- `GetLanguageByModeline` for cases when Vim/Emacs modeline e.g. `/* vim: set ft=cpp: */` may be present at a head or a tail of the text.
|
||||
- `GetLanguageByClassifier` uses a Bayesian classifier trained on all the `./samples/` from Linguist.
|
||||
|
||||
- `GetLanguageByShebang` reads only the first line of text to identify the [shebang](<https://en.wikipedia.org/wiki/Shebang_(Unix)>).
|
||||
- `GetLanguageByModeline` for cases when Vim/Emacs modeline e.g. `/* vim: set ft=cpp: */` may be present at a head or a tail of the text.
|
||||
- `GetLanguageByClassifier` uses a Bayesian classifier trained on all the `./samples/` from Linguist.
|
||||
|
||||
It usually is a last-resort strategy that is used to disambiguate the guess of the previous strategies, and thus it requires a list of "candidate" guesses. One can provide a list of all known languages - keys from the `data.LanguagesLogProbabilities` as possible candidates if more intelligent hypotheses are not available, at the price of possibly suboptimal accuracy.
|
||||
|
||||
### By file
|
||||
|
||||
The most accurate guess would be one when both, the file name and the content are available:
|
||||
- `GetLanguagesByContent` only uses file extension and a set of regexp-based content heuristics.
|
||||
- `GetLanguages` uses the full set of matching strategies and is expected to be most accurate.
|
||||
|
||||
- `GetLanguagesByContent` only uses file extension and a set of regexp-based content heuristics.
|
||||
- `GetLanguages` uses the full set of matching strategies and is expected to be most accurate.
|
||||
|
||||
### Filtering: vendoring, binaries, etc
|
||||
*enry* expose a set of file-level helpers `Is*` to simplify filtering out the files that are less interesting for the purpose of source code analysis:
|
||||
- `IsBinary`
|
||||
- `IsVendor`
|
||||
- `IsConfiguration`
|
||||
- `IsDocumentation`
|
||||
- `IsDotFile`
|
||||
- `IsImage`
|
||||
|
||||
_enry_ expose a set of file-level helpers `Is*` to simplify filtering out the files that are less interesting for the purpose of source code analysis:
|
||||
|
||||
- `IsBinary`
|
||||
- `IsVendor`
|
||||
- `IsConfiguration`
|
||||
- `IsDocumentation`
|
||||
- `IsDotFile`
|
||||
- `IsImage`
|
||||
- `IsTest`
|
||||
- `IsGenerated`
|
||||
|
||||
### Language colors and groups
|
||||
*enry* exposes function to get language color to use for example in presenting statistics in graphs:
|
||||
- `GetColor`
|
||||
- `GetLanguageGroup` can be used to group similar languages together e.g. for `Less` this function will return `CSS`
|
||||
|
||||
_enry_ exposes function to get language color to use for example in presenting statistics in graphs:
|
||||
|
||||
- `GetColor`
|
||||
- `GetLanguageGroup` can be used to group similar languages together e.g. for `Less` this function will return `CSS`
|
||||
|
||||
## Languages
|
||||
|
||||
@ -136,39 +148,36 @@ Generated Python bindings using a C shared library and cffi are WIP under [src-d
|
||||
A library is going to be published on pypi as [enry](https://pypi.org/project/enry/) for
|
||||
macOS and linux platforms. Windows support is planned under [src-d/enry#150](https://github.com/src-d/enry/issues/150).
|
||||
|
||||
Divergences from Linguist
|
||||
------------
|
||||
## Divergences from Linguist
|
||||
|
||||
The `enry` library is based on the data from `github/linguist` version **v7.9.0**.
|
||||
|
||||
Parsing [linguist/samples](https://github.com/github/linguist/tree/master/samples) the following `enry` results are different from the Linguist:
|
||||
|
||||
* [Heuristics for ".es" extension](https://github.com/github/linguist/blob/e761f9b013e5b61161481fcb898b59721ee40e3d/lib/linguist/heuristics.yml#L103) in JavaScript could not be parsed, due to unsupported backreference in RE2 regexp engine.
|
||||
- [Heuristics for ".es" extension](https://github.com/github/linguist/blob/e761f9b013e5b61161481fcb898b59721ee40e3d/lib/linguist/heuristics.yml#L103) in JavaScript could not be parsed, due to unsupported backreference in RE2 regexp engine.
|
||||
|
||||
* [Heuristics for ".rno" extension](https://github.com/github/linguist/blob/3a1bd3c3d3e741a8aaec4704f782e06f5cd2a00d/lib/linguist/heuristics.yml#L365) in RUNOFF could not be parsed, due to unsupported lookahead in RE2 regexp engine.
|
||||
- [Heuristics for ".rno" extension](https://github.com/github/linguist/blob/3a1bd3c3d3e741a8aaec4704f782e06f5cd2a00d/lib/linguist/heuristics.yml#L365) in RUNOFF could not be parsed, due to unsupported lookahead in RE2 regexp engine.
|
||||
|
||||
* [Heuristics for ".inc" extension](https://github.com/github/linguist/blob/f0e2d0d7f1ce600b2a5acccaef6b149c87d8b99c/lib/linguist/heuristics.yml#L222) in NASL could not be parsed, due to unsupported possessive quantifier in RE2 regexp engine.
|
||||
- [Heuristics for ".inc" extension](https://github.com/github/linguist/blob/f0e2d0d7f1ce600b2a5acccaef6b149c87d8b99c/lib/linguist/heuristics.yml#L222) in NASL could not be parsed, due to unsupported possessive quantifier in RE2 regexp engine.
|
||||
|
||||
* As of [Linguist v5.3.2](https://github.com/github/linguist/releases/tag/v5.3.2) it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry still uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. See [#193](https://github.com/src-d/enry/issues/193).
|
||||
- As of [Linguist v5.3.2](https://github.com/github/linguist/releases/tag/v5.3.2) it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry still uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. See [#193](https://github.com/src-d/enry/issues/193).
|
||||
|
||||
* Bayesian classifier can't distinguish "SQL" from "PLpgSQL. See [#194](https://github.com/src-d/enry/issues/194).
|
||||
- Bayesian classifier can't distinguish "SQL" from "PLpgSQL. See [#194](https://github.com/src-d/enry/issues/194).
|
||||
|
||||
* Detection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet.
|
||||
- Detection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet.
|
||||
(Thus they are not excluded from CLI output). See [#213](https://github.com/src-d/enry/issues/213).
|
||||
|
||||
* XML detection strategy is not implemented. See [#192](https://github.com/src-d/enry/issues/192).
|
||||
- XML detection strategy is not implemented. See [#192](https://github.com/src-d/enry/issues/192).
|
||||
|
||||
* Overriding languages and types though `.gitattributes` is not yet supported. See [#18](https://github.com/src-d/enry/issues/18).
|
||||
- Overriding languages and types though `.gitattributes` is not yet supported. See [#18](https://github.com/src-d/enry/issues/18).
|
||||
|
||||
* `enry` CLI output does NOT exclude `.gitignore`ed files and git submodules, as Linguist does
|
||||
- `enry` CLI output does NOT exclude `.gitignore`ed files and git submodules, as Linguist does
|
||||
|
||||
In all the cases above that have an issue number - we plan to update enry to match Linguist behavior.
|
||||
|
||||
## Benchmarks
|
||||
|
||||
Benchmarks
|
||||
------------
|
||||
|
||||
Enry's language detection has been compared with Linguist's on [*linguist/samples*](https://github.com/github/linguist/tree/master/samples).
|
||||
Enry's language detection has been compared with Linguist's on [_linguist/samples_](https://github.com/github/linguist/tree/master/samples).
|
||||
|
||||
We got these results:
|
||||
|
||||
@ -182,9 +191,7 @@ Go regexp engine being slower than Ruby's on, wich is based on [oniguruma](https
|
||||
|
||||
See [instructions](#misc) for running enry with oniguruma.
|
||||
|
||||
|
||||
Why Enry?
|
||||
------------
|
||||
## Why Enry?
|
||||
|
||||
In the movie [My Fair Lady](https://en.wikipedia.org/wiki/My_Fair_Lady), [Professor Henry Higgins](http://www.imdb.com/character/ch0011719/) is a linguist who at the very beginning of the movie enjoys guessing the origin of people based on their accent.
|
||||
|
||||
@ -199,10 +206,9 @@ To run the tests use:
|
||||
Setting `ENRY_TEST_REPO` to the path to existing checkout of Linguist will avoid cloning it and sepeed tests up.
|
||||
Setting `ENRY_DEBUG=1` will provide insight in the Bayesian classifier building done by `make code-generate`.
|
||||
|
||||
|
||||
### Sync with github/linguist upstream
|
||||
|
||||
*enry* re-uses parts of the original [github/linguist](https://github.com/github/linguist) to generate internal data structures.
|
||||
_enry_ re-uses parts of the original [github/linguist](https://github.com/github/linguist) to generate internal data structures.
|
||||
In order to update to the latest release of linguist do:
|
||||
|
||||
```bash
|
||||
@ -217,10 +223,10 @@ $ make code-generate
|
||||
|
||||
To stay in sync, enry needs to be updated when a new release of the linguist includes changes to any of the following files:
|
||||
|
||||
* [languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml)
|
||||
* [heuristics.yml](https://github.com/github/linguist/blob/master/lib/linguist/heuristics.yml)
|
||||
* [vendor.yml](https://github.com/github/linguist/blob/master/lib/linguist/vendor.yml)
|
||||
* [documentation.yml](https://github.com/github/linguist/blob/master/lib/linguist/documentation.yml)
|
||||
- [languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml)
|
||||
- [heuristics.yml](https://github.com/github/linguist/blob/master/lib/linguist/heuristics.yml)
|
||||
- [vendor.yml](https://github.com/github/linguist/blob/master/lib/linguist/vendor.yml)
|
||||
- [documentation.yml](https://github.com/github/linguist/blob/master/lib/linguist/documentation.yml)
|
||||
|
||||
There is no automation for detecting the changes in the linguist project, so this process above has to be done manually from time to time.
|
||||
|
||||
@ -229,8 +235,6 @@ the generated files (in [data](https://github.com/go-enry/go-enry/blob/master/da
|
||||
|
||||
Separating all the necessary "manual" code changes to a different PR that includes some background description and an update to the documentation on ["divergences from linguist"](#divergences-from-linguist) is very much appreciated as it simplifies the maintenance (review/release notes/etc).
|
||||
|
||||
|
||||
|
||||
## Misc
|
||||
|
||||
<details>
|
||||
@ -238,19 +242,20 @@ Separating all the necessary "manual" code changes to a different PR that includ
|
||||
|
||||
### Benchmark
|
||||
|
||||
All benchmark scripts are in [*benchmarks*](https://github.com/go-enry/go-enry/blob/master/benchmarks) directory.
|
||||
|
||||
All benchmark scripts are in [_benchmarks_](https://github.com/go-enry/go-enry/blob/master/benchmarks) directory.
|
||||
|
||||
#### Dependencies
|
||||
As benchmarks depend on Ruby and Github-Linguist gem make sure you have:
|
||||
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
|
||||
- Docker
|
||||
- [native dependencies](https://github.com/github/linguist/#dependencies) installed
|
||||
- Build the gem `cd .linguist && bundle install && rake build_gem && cd -`
|
||||
- Install it `gem install --no-rdoc --no-ri --local .linguist/github-linguist-*.gem`
|
||||
|
||||
As benchmarks depend on Ruby and Github-Linguist gem make sure you have:
|
||||
|
||||
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
|
||||
- Docker
|
||||
- [native dependencies](https://github.com/github/linguist/#dependencies) installed
|
||||
- Build the gem `cd .linguist && bundle install && rake build_gem && cd -`
|
||||
- Install it `gem install --no-rdoc --no-ri --local .linguist/github-linguist-*.gem`
|
||||
|
||||
#### Quick benchmark
|
||||
|
||||
To run quicker benchmarks
|
||||
|
||||
make benchmarks
|
||||
@ -259,19 +264,20 @@ to get average times for the primary detection function and strategies for the w
|
||||
|
||||
make benchmarks-samples
|
||||
|
||||
|
||||
#### Full benchmark
|
||||
|
||||
If you want to reproduce the same benchmarks as reported above:
|
||||
- Make sure all [dependencies](#benchmark-dependencies) are installed
|
||||
- Install [gnuplot](http://gnuplot.info) (in order to plot the histogram)
|
||||
- Run `ENRY_TEST_REPO="$PWD/.linguist" benchmarks/run.sh` (takes ~15h)
|
||||
|
||||
- Make sure all [dependencies](#benchmark-dependencies) are installed
|
||||
- Install [gnuplot](http://gnuplot.info) (in order to plot the histogram)
|
||||
- Run `ENRY_TEST_REPO="$PWD/.linguist" benchmarks/run.sh` (takes ~15h)
|
||||
|
||||
It will run the benchmarks for enry and Linguist, parse the output, create csv files and plot the histogram.
|
||||
|
||||
### Faster regexp engine (optional)
|
||||
|
||||
[Oniguruma](https://github.com/kkos/oniguruma) is CRuby's regular expression engine.
|
||||
It is very fast and performs better than the one built into Go runtime. *enry* supports swapping
|
||||
It is very fast and performs better than the one built into Go runtime. _enry_ supports swapping
|
||||
between those two engines thanks to [rubex](https://github.com/moovweb/rubex) project.
|
||||
The typical overall speedup from using Oniguruma is 1.5-2x. However, it requires CGo and the external shared library.
|
||||
On macOS with [Homebrew](https://brew.sh/), it is:
|
||||
@ -296,8 +302,6 @@ and then rebuild the project.
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
License
|
||||
------------
|
||||
## License
|
||||
|
||||
Apache License, Version 2.0. See [LICENSE](LICENSE)
|
||||
|
12
vendor/github.com/go-enry/go-enry/v2/common.go
generated
vendored
12
vendor/github.com/go-enry/go-enry/v2/common.go
generated
vendored
@ -328,15 +328,13 @@ func getInterpreter(data []byte) (interpreter string) {
|
||||
return
|
||||
}
|
||||
|
||||
func getFirstLine(data []byte) []byte {
|
||||
buf := bufio.NewScanner(bytes.NewReader(data))
|
||||
buf.Scan()
|
||||
line := buf.Bytes()
|
||||
if err := buf.Err(); err != nil {
|
||||
return nil
|
||||
func getFirstLine(content []byte) []byte {
|
||||
nlpos := bytes.IndexByte(content, '\n')
|
||||
if nlpos < 0 {
|
||||
return content
|
||||
}
|
||||
|
||||
return line
|
||||
return content[:nlpos]
|
||||
}
|
||||
|
||||
func hasShebang(line []byte) bool {
|
||||
|
40
vendor/github.com/go-enry/go-enry/v2/data/documentation.go
generated
vendored
40
vendor/github.com/go-enry/go-enry/v2/data/documentation.go
generated
vendored
@ -3,24 +3,24 @@
|
||||
|
||||
package data
|
||||
|
||||
import "gopkg.in/toqueteos/substring.v1"
|
||||
import "github.com/go-enry/go-enry/v2/regex"
|
||||
|
||||
var DocumentationMatchers = substring.Or(
|
||||
substring.Regexp(`^[Dd]ocs?/`),
|
||||
substring.Regexp(`(^|/)[Dd]ocumentation/`),
|
||||
substring.Regexp(`(^|/)[Gg]roovydoc/`),
|
||||
substring.Regexp(`(^|/)[Jj]avadoc/`),
|
||||
substring.Regexp(`^[Mm]an/`),
|
||||
substring.Regexp(`^[Ee]xamples/`),
|
||||
substring.Regexp(`^[Dd]emos?/`),
|
||||
substring.Regexp(`(^|/)inst/doc/`),
|
||||
substring.Regexp(`(^|/)CHANGE(S|LOG)?(\.|$)`),
|
||||
substring.Regexp(`(^|/)CONTRIBUTING(\.|$)`),
|
||||
substring.Regexp(`(^|/)COPYING(\.|$)`),
|
||||
substring.Regexp(`(^|/)INSTALL(\.|$)`),
|
||||
substring.Regexp(`(^|/)LICEN[CS]E(\.|$)`),
|
||||
substring.Regexp(`(^|/)[Ll]icen[cs]e(\.|$)`),
|
||||
substring.Regexp(`(^|/)README(\.|$)`),
|
||||
substring.Regexp(`(^|/)[Rr]eadme(\.|$)`),
|
||||
substring.Regexp(`^[Ss]amples?/`),
|
||||
)
|
||||
var DocumentationMatchers = []regex.EnryRegexp{
|
||||
regex.MustCompile(`^[Dd]ocs?/`),
|
||||
regex.MustCompile(`(^|/)[Dd]ocumentation/`),
|
||||
regex.MustCompile(`(^|/)[Gg]roovydoc/`),
|
||||
regex.MustCompile(`(^|/)[Jj]avadoc/`),
|
||||
regex.MustCompile(`^[Mm]an/`),
|
||||
regex.MustCompile(`^[Ee]xamples/`),
|
||||
regex.MustCompile(`^[Dd]emos?/`),
|
||||
regex.MustCompile(`(^|/)inst/doc/`),
|
||||
regex.MustCompile(`(^|/)CHANGE(S|LOG)?(\.|$)`),
|
||||
regex.MustCompile(`(^|/)CONTRIBUTING(\.|$)`),
|
||||
regex.MustCompile(`(^|/)COPYING(\.|$)`),
|
||||
regex.MustCompile(`(^|/)INSTALL(\.|$)`),
|
||||
regex.MustCompile(`(^|/)LICEN[CS]E(\.|$)`),
|
||||
regex.MustCompile(`(^|/)[Ll]icen[cs]e(\.|$)`),
|
||||
regex.MustCompile(`(^|/)README(\.|$)`),
|
||||
regex.MustCompile(`(^|/)[Rr]eadme(\.|$)`),
|
||||
regex.MustCompile(`^[Ss]amples?/`),
|
||||
}
|
||||
|
823
vendor/github.com/go-enry/go-enry/v2/data/generated.go
generated
vendored
Normal file
823
vendor/github.com/go-enry/go-enry/v2/data/generated.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
17
vendor/github.com/go-enry/go-enry/v2/data/test.go
generated
vendored
Normal file
17
vendor/github.com/go-enry/go-enry/v2/data/test.go
generated
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
package data
|
||||
|
||||
import "github.com/go-enry/go-enry/v2/regex"
|
||||
|
||||
// TestMatchers is hand made collection of regexp used by the function `enry.IsTest`
|
||||
// to identify test files in different languages.
|
||||
var TestMatchers = []regex.EnryRegexp{
|
||||
regex.MustCompile(`(^|/)tests/.*Test\.php$`),
|
||||
regex.MustCompile(`(^|/)test/.*Test(s?)\.java$`),
|
||||
regex.MustCompile(`(^|/)test(/|/.*/)Test.*\.java$`),
|
||||
regex.MustCompile(`(^|/)test/.*(Test(s?)|Spec(s?))\.scala$`),
|
||||
regex.MustCompile(`(^|/)test_.*\.py$`),
|
||||
regex.MustCompile(`(^|/).*_test\.go$`),
|
||||
regex.MustCompile(`(^|/).*_(test|spec)\.rb$`),
|
||||
regex.MustCompile(`(^|/).*Test(s?)\.cs$`),
|
||||
regex.MustCompile(`(^|/).*\.(test|spec)\.(ts|tsx|js)$`),
|
||||
}
|
326
vendor/github.com/go-enry/go-enry/v2/data/vendor.go
generated
vendored
326
vendor/github.com/go-enry/go-enry/v2/data/vendor.go
generated
vendored
File diff suppressed because it is too large
Load Diff
4
vendor/github.com/go-enry/go-enry/v2/go.mod
generated
vendored
4
vendor/github.com/go-enry/go-enry/v2/go.mod
generated
vendored
@ -3,9 +3,7 @@ module github.com/go-enry/go-enry/v2
|
||||
go 1.14
|
||||
|
||||
require (
|
||||
github.com/go-enry/go-oniguruma v1.2.0
|
||||
github.com/go-enry/go-oniguruma v1.2.1
|
||||
github.com/stretchr/testify v1.3.0
|
||||
github.com/toqueteos/trie v1.0.0 // indirect
|
||||
gopkg.in/toqueteos/substring.v1 v1.0.2
|
||||
gopkg.in/yaml.v2 v2.2.8
|
||||
)
|
||||
|
6
vendor/github.com/go-enry/go-enry/v2/go.sum
generated
vendored
6
vendor/github.com/go-enry/go-enry/v2/go.sum
generated
vendored
@ -2,17 +2,15 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-enry/go-oniguruma v1.2.0 h1:oBO9XC1IDT9+AoWW5oFsa/7gFeOPacEqDbyXZKWXuDs=
|
||||
github.com/go-enry/go-oniguruma v1.2.0/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
|
||||
github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo=
|
||||
github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/toqueteos/trie v1.0.0 h1:8i6pXxNUXNRAqP246iibb7w/pSFquNTQ+uNfriG7vlk=
|
||||
github.com/toqueteos/trie v1.0.0/go.mod h1:Ywk48QhEqhU1+DwhMkJ2x7eeGxDHiGkAdc9+0DYcbsM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/toqueteos/substring.v1 v1.0.2 h1:urLqCeMm6x/eTuQa1oZerNw8N1KNOIp5hD5kGL7lFsE=
|
||||
gopkg.in/toqueteos/substring.v1 v1.0.2/go.mod h1:Eb2Z1UYehlVK8LYW2WBVR2rwbujsz3aX8XDrM1vbNew=
|
||||
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
|
||||
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
|
53
vendor/github.com/go-enry/go-enry/v2/utils.go
generated
vendored
53
vendor/github.com/go-enry/go-enry/v2/utils.go
generated
vendored
@ -6,12 +6,18 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/go-enry/go-enry/v2/data"
|
||||
"github.com/go-enry/go-enry/v2/regex"
|
||||
)
|
||||
|
||||
const binSniffLen = 8000
|
||||
|
||||
var configurationLanguages = map[string]bool{
|
||||
"XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true,
|
||||
var configurationLanguages = map[string]struct{}{
|
||||
"XML": {},
|
||||
"JSON": {},
|
||||
"TOML": {},
|
||||
"YAML": {},
|
||||
"INI": {},
|
||||
"SQL": {},
|
||||
}
|
||||
|
||||
// IsConfiguration tells if filename is in one of the configuration languages.
|
||||
@ -46,7 +52,7 @@ func GetMIMEType(path string, language string) string {
|
||||
|
||||
// IsDocumentation returns whether or not path is a documentation path.
|
||||
func IsDocumentation(path string) bool {
|
||||
return data.DocumentationMatchers.Match(path)
|
||||
return matchRegexSlice(data.DocumentationMatchers, path)
|
||||
}
|
||||
|
||||
// IsDotFile returns whether or not path has dot as a prefix.
|
||||
@ -57,7 +63,12 @@ func IsDotFile(path string) bool {
|
||||
|
||||
// IsVendor returns whether or not path is a vendor path.
|
||||
func IsVendor(path string) bool {
|
||||
return data.VendorMatchers.Match(path)
|
||||
return matchRegexSlice(data.VendorMatchers, path)
|
||||
}
|
||||
|
||||
// IsTest returns whether or not path is a test path.
|
||||
func IsTest(path string) bool {
|
||||
return matchRegexSlice(data.TestMatchers, path)
|
||||
}
|
||||
|
||||
// IsBinary detects if data is a binary value based on:
|
||||
@ -86,3 +97,37 @@ func GetColor(language string) string {
|
||||
|
||||
return "#cccccc"
|
||||
}
|
||||
|
||||
func matchRegexSlice(exprs []regex.EnryRegexp, str string) bool {
|
||||
for _, expr := range exprs {
|
||||
if expr.MatchString(str) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// IsGenerated returns whether the file with the given path and content is a
|
||||
// generated file.
|
||||
func IsGenerated(path string, content []byte) bool {
|
||||
ext := strings.ToLower(filepath.Ext(path))
|
||||
if _, ok := data.GeneratedCodeExtensions[ext]; ok {
|
||||
return true
|
||||
}
|
||||
|
||||
for _, m := range data.GeneratedCodeNameMatchers {
|
||||
if m(path) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
path = strings.ToLower(path)
|
||||
for _, m := range data.GeneratedCodeMatchers {
|
||||
if m(path, ext, content) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
22
vendor/github.com/go-enry/go-oniguruma/chelper.c
generated
vendored
22
vendor/github.com/go-enry/go-oniguruma/chelper.c
generated
vendored
@ -7,7 +7,7 @@
|
||||
#include "chelper.h"
|
||||
|
||||
int NewOnigRegex( char *pattern, int pattern_length, int option,
|
||||
OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) {
|
||||
OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) {
|
||||
int ret = ONIG_NORMAL;
|
||||
int error_msg_len = 0;
|
||||
|
||||
@ -23,8 +23,6 @@ int NewOnigRegex( char *pattern, int pattern_length, int option,
|
||||
|
||||
memset(*error_buffer, 0, ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char));
|
||||
|
||||
*region = onig_region_new();
|
||||
|
||||
ret = onig_new(regex, pattern_start, pattern_end, (OnigOptionType)(option), *encoding, OnigDefaultSyntax, *error_info);
|
||||
|
||||
if (ret != ONIG_NORMAL) {
|
||||
@ -38,9 +36,10 @@ int NewOnigRegex( char *pattern, int pattern_length, int option,
|
||||
}
|
||||
|
||||
int SearchOnigRegex( void *str, int str_length, int offset, int option,
|
||||
OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) {
|
||||
OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) {
|
||||
int ret = ONIG_MISMATCH;
|
||||
int error_msg_len = 0;
|
||||
OnigRegion *region;
|
||||
#ifdef BENCHMARK_CHELP
|
||||
struct timeval tim1, tim2;
|
||||
long t;
|
||||
@ -55,6 +54,8 @@ int SearchOnigRegex( void *str, int str_length, int offset, int option,
|
||||
gettimeofday(&tim1, NULL);
|
||||
#endif
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
ret = onig_search(regex, str_start, str_end, search_start, search_end, region, option);
|
||||
if (ret < 0 && error_buffer != NULL) {
|
||||
error_msg_len = onig_error_code_to_str((unsigned char*)(error_buffer), ret, error_info);
|
||||
@ -74,6 +75,8 @@ int SearchOnigRegex( void *str, int str_length, int offset, int option,
|
||||
*numCaptures = count;
|
||||
}
|
||||
|
||||
onig_region_free(region, 1);
|
||||
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim2, NULL);
|
||||
t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
|
||||
@ -83,9 +86,10 @@ int SearchOnigRegex( void *str, int str_length, int offset, int option,
|
||||
}
|
||||
|
||||
int MatchOnigRegex(void *str, int str_length, int offset, int option,
|
||||
OnigRegex regex, OnigRegion *region) {
|
||||
OnigRegex regex) {
|
||||
int ret = ONIG_MISMATCH;
|
||||
int error_msg_len = 0;
|
||||
OnigRegion *region;
|
||||
#ifdef BENCHMARK_CHELP
|
||||
struct timeval tim1, tim2;
|
||||
long t;
|
||||
@ -98,7 +102,9 @@ int MatchOnigRegex(void *str, int str_length, int offset, int option,
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim1, NULL);
|
||||
#endif
|
||||
region = onig_region_new();
|
||||
ret = onig_match(regex, str_start, str_end, search_start, region, option);
|
||||
onig_region_free(region, 1);
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim2, NULL);
|
||||
t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
|
||||
@ -108,8 +114,9 @@ int MatchOnigRegex(void *str, int str_length, int offset, int option,
|
||||
}
|
||||
|
||||
int LookupOnigCaptureByName(char *name, int name_length,
|
||||
OnigRegex regex, OnigRegion *region) {
|
||||
OnigRegex regex) {
|
||||
int ret = ONIGERR_UNDEFINED_NAME_REFERENCE;
|
||||
OnigRegion *region;
|
||||
#ifdef BENCHMARK_CHELP
|
||||
struct timeval tim1, tim2;
|
||||
long t;
|
||||
@ -119,7 +126,9 @@ int LookupOnigCaptureByName(char *name, int name_length,
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim1, NULL);
|
||||
#endif
|
||||
region = onig_region_new();
|
||||
ret = onig_name_to_backref_number(regex, name_start, name_end, region);
|
||||
onig_region_free(region, 1);
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim2, NULL);
|
||||
t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
|
||||
@ -181,4 +190,3 @@ int GetCaptureNames(OnigRegex reg, void *buffer, int bufferSize, int* groupNumbe
|
||||
onig_foreach_name(reg, name_callback, (void* )&groupInfo);
|
||||
return groupInfo.bufferOffset;
|
||||
}
|
||||
|
||||
|
8
vendor/github.com/go-enry/go-oniguruma/chelper.h
generated
vendored
8
vendor/github.com/go-enry/go-oniguruma/chelper.h
generated
vendored
@ -1,14 +1,14 @@
|
||||
#include <oniguruma.h>
|
||||
|
||||
extern int NewOnigRegex( char *pattern, int pattern_length, int option,
|
||||
OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer);
|
||||
OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer);
|
||||
|
||||
extern int SearchOnigRegex( void *str, int str_length, int offset, int option,
|
||||
OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures);
|
||||
OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures);
|
||||
|
||||
extern int MatchOnigRegex( void *str, int str_length, int offset, int option,
|
||||
OnigRegex regex, OnigRegion *region);
|
||||
OnigRegex regex);
|
||||
|
||||
extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex, OnigRegion *region);
|
||||
extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex);
|
||||
|
||||
extern int GetCaptureNames(OnigRegex regex, void *buffer, int bufferSize, int* groupNumbers);
|
||||
|
325
vendor/github.com/go-enry/go-oniguruma/regex.go
generated
vendored
325
vendor/github.com/go-enry/go-oniguruma/regex.go
generated
vendored
File diff suppressed because it is too large
Load Diff
22
vendor/github.com/toqueteos/trie/LICENSE.txt
generated
vendored
22
vendor/github.com/toqueteos/trie/LICENSE.txt
generated
vendored
@ -1,22 +0,0 @@
|
||||
Copyright (c) 2013 Caleb Spare
|
||||
|
||||
MIT License
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
7
vendor/github.com/toqueteos/trie/README.md
generated
vendored
7
vendor/github.com/toqueteos/trie/README.md
generated
vendored
@ -1,7 +0,0 @@
|
||||
# Trie
|
||||
|
||||
[](http://godoc.org/github.com/toqueteos/trie)
|
||||
|
||||
This is a fork of https://github.com/cespare/go-trie that adds the `PrefixIndex` method.
|
||||
|
||||
It's required for https://github.com/toqueteos/substring.
|
1
vendor/github.com/toqueteos/trie/go.mod
generated
vendored
1
vendor/github.com/toqueteos/trie/go.mod
generated
vendored
@ -1 +0,0 @@
|
||||
module github.com/toqueteos/trie
|
102
vendor/github.com/toqueteos/trie/trie.go
generated
vendored
102
vendor/github.com/toqueteos/trie/trie.go
generated
vendored
@ -1,102 +0,0 @@
|
||||
// Package trie is an implementation of a trie (prefix tree) data structure over byte slices. It provides a
|
||||
// small and simple API for usage as a set as well as a 'Node' API for walking the trie.
|
||||
package trie
|
||||
|
||||
// A Trie is a a prefix tree.
|
||||
type Trie struct {
|
||||
root *Node
|
||||
}
|
||||
|
||||
// New construct a new, empty Trie ready for use.
|
||||
func New() *Trie {
|
||||
return &Trie{
|
||||
root: &Node{},
|
||||
}
|
||||
}
|
||||
|
||||
// Insert puts b into the Trie. It returns true if the element was not previously in t.
|
||||
func (t *Trie) Insert(b []byte) bool {
|
||||
n := t.root
|
||||
for _, c := range b {
|
||||
next, ok := n.Walk(c)
|
||||
if !ok {
|
||||
next = &Node{}
|
||||
n.branches[c] = next
|
||||
n.hasChildren = true
|
||||
}
|
||||
n = next
|
||||
}
|
||||
if n.terminal {
|
||||
return false
|
||||
}
|
||||
n.terminal = true
|
||||
return true
|
||||
}
|
||||
|
||||
// Contains checks t for membership of b.
|
||||
func (t *Trie) Contains(b []byte) bool {
|
||||
n := t.root
|
||||
for _, c := range b {
|
||||
next, ok := n.Walk(c)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
n = next
|
||||
}
|
||||
return n.terminal
|
||||
}
|
||||
|
||||
// PrefixIndex walks through `b` until a prefix is found (terminal node) or it is exhausted.
|
||||
func (t *Trie) PrefixIndex(b []byte) int {
|
||||
var idx int
|
||||
n := t.root
|
||||
for _, c := range b {
|
||||
next, ok := n.Walk(c)
|
||||
if !ok {
|
||||
return -1
|
||||
}
|
||||
if next.terminal {
|
||||
return idx
|
||||
}
|
||||
n = next
|
||||
idx++
|
||||
}
|
||||
if !n.terminal {
|
||||
idx = -1
|
||||
}
|
||||
return idx
|
||||
}
|
||||
|
||||
// Root returns the root node of a Trie. A valid Trie (i.e., constructed with New), always has a non-nil root
|
||||
// node.
|
||||
func (t *Trie) Root() *Node {
|
||||
return t.root
|
||||
}
|
||||
|
||||
// A Node represents a logical vertex in the trie structure.
|
||||
type Node struct {
|
||||
branches [256]*Node
|
||||
terminal bool
|
||||
hasChildren bool
|
||||
}
|
||||
|
||||
// Walk returns the node reached along edge c, if one exists. The ok value indicates whether such a node
|
||||
// exist.
|
||||
func (n *Node) Walk(c byte) (next *Node, ok bool) {
|
||||
next = n.branches[int(c)]
|
||||
return next, (next != nil)
|
||||
}
|
||||
|
||||
// Terminal indicates whether n is terminal in the trie (that is, whether the path from the root to n
|
||||
// represents an element in the set). For instance, if the root node is terminal, then []byte{} is in the
|
||||
// trie.
|
||||
func (n *Node) Terminal() bool {
|
||||
return n.terminal
|
||||
}
|
||||
|
||||
// Leaf indicates whether n is a leaf node in the trie (that is, whether it has children). A leaf node must be
|
||||
// terminal (else it would not exist). Logically, if n is a leaf node then the []byte represented by the path
|
||||
// from the root to n is not a proper prefix of any element of the trie.
|
||||
func (n *Node) Leaf() bool {
|
||||
return !n.hasChildren
|
||||
}
|
24
vendor/gopkg.in/toqueteos/substring.v1/.gitignore
generated
vendored
24
vendor/gopkg.in/toqueteos/substring.v1/.gitignore
generated
vendored
@ -1,24 +0,0 @@
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
11
vendor/gopkg.in/toqueteos/substring.v1/.travis.yml
generated
vendored
11
vendor/gopkg.in/toqueteos/substring.v1/.travis.yml
generated
vendored
@ -1,11 +0,0 @@
|
||||
language: go
|
||||
|
||||
go:
|
||||
- 1.2
|
||||
- 1.3
|
||||
- 1.4
|
||||
- tip
|
||||
|
||||
script:
|
||||
- go get launchpad.net/gocheck
|
||||
- go test
|
22
vendor/gopkg.in/toqueteos/substring.v1/LICENSE
generated
vendored
22
vendor/gopkg.in/toqueteos/substring.v1/LICENSE
generated
vendored
@ -1,22 +0,0 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Carlos Cobo
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
80
vendor/gopkg.in/toqueteos/substring.v1/README.md
generated
vendored
80
vendor/gopkg.in/toqueteos/substring.v1/README.md
generated
vendored
@ -1,80 +0,0 @@
|
||||
# substring [](https://travis-ci.org/toqueteos/substring) [](http://godoc.org/github.com/toqueteos/substring) [](https://github.com/toqueteos/substring/releases)
|
||||
|
||||
Simple and composable alternative to [regexp](http://golang.org/pkg/regexp/) package for fast substring searches.
|
||||
|
||||
## Installation
|
||||
|
||||
The recommended way to install substring
|
||||
|
||||
```
|
||||
go get -t gopkg.in/toqueteos/substring.v1
|
||||
```
|
||||
|
||||
The `-t` flag is for fetching [gocheck](https://gopkg.in/check.v1), required for tests and benchmarks.
|
||||
|
||||
## Examples
|
||||
|
||||
A basic example with two matchers:
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
|
||||
"gopkg.in/toqueteos/substring.v1"
|
||||
)
|
||||
|
||||
func main() {
|
||||
m1 := substring.After("assets/", substring.Or(
|
||||
substring.Has("jquery"),
|
||||
substring.Has("angular"),
|
||||
substring.Suffixes(".js", ".css", ".html"),
|
||||
))
|
||||
fmt.Println(m1.Match("assets/angular/foo/bar")) //Prints: true
|
||||
fmt.Println(m1.Match("assets/js/file.js")) //Prints: true
|
||||
fmt.Println(m1.Match("assets/style/bar.css")) //Prints: true
|
||||
fmt.Println(m1.Match("assets/foo/bar.html")) //Prints: false
|
||||
fmt.Println(m1.Match("assets/js/qux.json")) //Prints: false
|
||||
fmt.Println(m1.Match("core/file.html")) //Prints: false
|
||||
fmt.Println(m1.Match("foobar/that.jsx")) //Prints: false
|
||||
|
||||
m2 := substring.After("vendor/", substring.Suffixes(".css", ".js", ".less"))
|
||||
|
||||
fmt.Println(m2.Match("foo/vendor/bar/qux.css")) //Prints: true
|
||||
fmt.Println(m2.Match("foo/var/qux.less")) //Prints: false
|
||||
|
||||
re := regexp.MustCompile(`vendor\/.*\.(css|js|less)$`)
|
||||
fmt.Println(re.MatchString("foo/vendor/bar/qux.css")) //Prints: true
|
||||
fmt.Println(re.MatchString("foo/var/qux.less")) //Prints: false
|
||||
}
|
||||
```
|
||||
|
||||
## How fast?
|
||||
|
||||
It may vary depending on your use case but 1~2 orders of magnitude faster than `regexp` is pretty common.
|
||||
|
||||
Test it out for yourself by running `go test -check.b`!
|
||||
|
||||
```
|
||||
$ go test -check.b
|
||||
PASS: lib_test.go:18: LibSuite.BenchmarkExample1 10000000 221 ns/op
|
||||
PASS: lib_test.go:23: LibSuite.BenchmarkExample2 10000000 229 ns/op
|
||||
PASS: lib_test.go:28: LibSuite.BenchmarkExample3 10000000 216 ns/op
|
||||
PASS: lib_test.go:33: LibSuite.BenchmarkExample4 10000000 208 ns/op
|
||||
PASS: lib_test.go:38: LibSuite.BenchmarkExample5 20000000 82.1 ns/op
|
||||
PASS: lib_test.go:48: LibSuite.BenchmarkExampleRe1 500000 4136 ns/op
|
||||
PASS: lib_test.go:53: LibSuite.BenchmarkExampleRe2 500000 5222 ns/op
|
||||
PASS: lib_test.go:58: LibSuite.BenchmarkExampleRe3 500000 5116 ns/op
|
||||
PASS: lib_test.go:63: LibSuite.BenchmarkExampleRe4 500000 4020 ns/op
|
||||
PASS: lib_test.go:68: LibSuite.BenchmarkExampleRe5 10000000 226 ns/op
|
||||
OK: 10 passed
|
||||
PASS
|
||||
ok gopkg.in/toqueteos/substring.v1 23.471s
|
||||
```
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
MIT, see [LICENSE](LICENSE)
|
229
vendor/gopkg.in/toqueteos/substring.v1/bytes.go
generated
vendored
229
vendor/gopkg.in/toqueteos/substring.v1/bytes.go
generated
vendored
@ -1,229 +0,0 @@
|
||||
package substring
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"regexp"
|
||||
|
||||
"github.com/toqueteos/trie"
|
||||
)
|
||||
|
||||
type BytesMatcher interface {
|
||||
Match(b []byte) bool
|
||||
MatchIndex(b []byte) int
|
||||
}
|
||||
|
||||
// regexp
|
||||
type regexpBytes struct{ re *regexp.Regexp }
|
||||
|
||||
func BytesRegexp(pat string) *regexpBytes { return ®expBytes{regexp.MustCompile(pat)} }
|
||||
func (m *regexpBytes) Match(b []byte) bool { return m.re.Match(b) }
|
||||
func (m *regexpBytes) MatchIndex(b []byte) int {
|
||||
found := m.re.FindIndex(b)
|
||||
if found != nil {
|
||||
return found[1]
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// exact
|
||||
type exactBytes struct{ pat []byte }
|
||||
|
||||
func BytesExact(pat string) *exactBytes { return &exactBytes{[]byte(pat)} }
|
||||
func (m *exactBytes) Match(b []byte) bool {
|
||||
l, r := len(m.pat), len(b)
|
||||
if l != r {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < l; i++ {
|
||||
if b[i] != m.pat[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
func (m *exactBytes) MatchIndex(b []byte) int {
|
||||
if m.Match(b) {
|
||||
return len(b)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// any, search `s` in `.Match(pat)`
|
||||
type anyBytes struct {
|
||||
pat []byte
|
||||
}
|
||||
|
||||
func BytesAny(pat string) *anyBytes { return &anyBytes{[]byte(pat)} }
|
||||
func (m *anyBytes) Match(b []byte) bool { return bytes.Index(m.pat, b) >= 0 }
|
||||
func (m *anyBytes) MatchIndex(b []byte) int {
|
||||
if idx := bytes.Index(m.pat, b); idx >= 0 {
|
||||
return idx + len(b)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// has, search `pat` in `.Match(s)`
|
||||
type hasBytes struct {
|
||||
pat []byte
|
||||
}
|
||||
|
||||
func BytesHas(pat string) *hasBytes { return &hasBytes{[]byte(pat)} }
|
||||
func (m *hasBytes) Match(b []byte) bool { return bytes.Index(b, m.pat) >= 0 }
|
||||
func (m *hasBytes) MatchIndex(b []byte) int {
|
||||
if idx := bytes.Index(b, m.pat); idx >= 0 {
|
||||
return idx + len(m.pat)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// prefix
|
||||
type prefixBytes struct{ pat []byte }
|
||||
|
||||
func BytesPrefix(pat string) *prefixBytes { return &prefixBytes{[]byte(pat)} }
|
||||
func (m *prefixBytes) Match(b []byte) bool { return bytes.HasPrefix(b, m.pat) }
|
||||
func (m *prefixBytes) MatchIndex(b []byte) int {
|
||||
if bytes.HasPrefix(b, m.pat) {
|
||||
return len(m.pat)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// prefixes
|
||||
type prefixesBytes struct {
|
||||
t *trie.Trie
|
||||
}
|
||||
|
||||
func BytesPrefixes(pats ...string) *prefixesBytes {
|
||||
t := trie.New()
|
||||
for _, pat := range pats {
|
||||
t.Insert([]byte(pat))
|
||||
}
|
||||
return &prefixesBytes{t}
|
||||
}
|
||||
func (m *prefixesBytes) Match(b []byte) bool { return m.t.PrefixIndex(b) >= 0 }
|
||||
func (m *prefixesBytes) MatchIndex(b []byte) int {
|
||||
if idx := m.t.PrefixIndex(b); idx >= 0 {
|
||||
return idx
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// suffix
|
||||
type suffixBytes struct{ pat []byte }
|
||||
|
||||
func BytesSuffix(pat string) *suffixBytes { return &suffixBytes{[]byte(pat)} }
|
||||
func (m *suffixBytes) Match(b []byte) bool { return bytes.HasSuffix(b, m.pat) }
|
||||
func (m *suffixBytes) MatchIndex(b []byte) int {
|
||||
if bytes.HasSuffix(b, m.pat) {
|
||||
return len(m.pat)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// suffixes
|
||||
type suffixesBytes struct {
|
||||
t *trie.Trie
|
||||
}
|
||||
|
||||
func BytesSuffixes(pats ...string) *suffixesBytes {
|
||||
t := trie.New()
|
||||
for _, pat := range pats {
|
||||
t.Insert(reverse([]byte(pat)))
|
||||
}
|
||||
return &suffixesBytes{t}
|
||||
}
|
||||
func (m *suffixesBytes) Match(b []byte) bool {
|
||||
return m.t.PrefixIndex(reverse(b)) >= 0
|
||||
}
|
||||
func (m *suffixesBytes) MatchIndex(b []byte) int {
|
||||
if idx := m.t.PrefixIndex(reverse(b)); idx >= 0 {
|
||||
return idx
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// after
|
||||
type afterBytes struct {
|
||||
first []byte
|
||||
matcher BytesMatcher
|
||||
}
|
||||
|
||||
func BytesAfter(first string, m BytesMatcher) *afterBytes { return &afterBytes{[]byte(first), m} }
|
||||
func (a *afterBytes) Match(b []byte) bool {
|
||||
if idx := bytes.Index(b, a.first); idx >= 0 {
|
||||
return a.matcher.Match(b[idx+len(a.first):])
|
||||
}
|
||||
return false
|
||||
}
|
||||
func (a *afterBytes) MatchIndex(b []byte) int {
|
||||
if idx := bytes.Index(b, a.first); idx >= 0 {
|
||||
return idx + a.matcher.MatchIndex(b[idx:])
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// and, returns true iff all matchers return true
|
||||
type andBytes struct{ matchers []BytesMatcher }
|
||||
|
||||
func BytesAnd(m ...BytesMatcher) *andBytes { return &andBytes{m} }
|
||||
func (a *andBytes) Match(b []byte) bool {
|
||||
for _, m := range a.matchers {
|
||||
if !m.Match(b) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
func (a *andBytes) MatchIndex(b []byte) int {
|
||||
longest := 0
|
||||
for _, m := range a.matchers {
|
||||
if idx := m.MatchIndex(b); idx < 0 {
|
||||
return -1
|
||||
} else if idx > longest {
|
||||
longest = idx
|
||||
}
|
||||
}
|
||||
return longest
|
||||
}
|
||||
|
||||
// or, returns true iff any matcher returns true
|
||||
type orBytes struct{ matchers []BytesMatcher }
|
||||
|
||||
func BytesOr(m ...BytesMatcher) *orBytes { return &orBytes{m} }
|
||||
func (o *orBytes) Match(b []byte) bool {
|
||||
for _, m := range o.matchers {
|
||||
if m.Match(b) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
func (o *orBytes) MatchIndex(b []byte) int {
|
||||
for _, m := range o.matchers {
|
||||
if idx := m.MatchIndex(b); idx >= 0 {
|
||||
return idx
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
type suffixGroupBytes struct {
|
||||
suffix BytesMatcher
|
||||
matchers []BytesMatcher
|
||||
}
|
||||
|
||||
func BytesSuffixGroup(s string, m ...BytesMatcher) *suffixGroupBytes {
|
||||
return &suffixGroupBytes{BytesSuffix(s), m}
|
||||
}
|
||||
func (sg *suffixGroupBytes) Match(b []byte) bool {
|
||||
if sg.suffix.Match(b) {
|
||||
return BytesOr(sg.matchers...).Match(b)
|
||||
}
|
||||
return false
|
||||
}
|
||||
func (sg *suffixGroupBytes) MatchIndex(b []byte) int {
|
||||
if sg.suffix.MatchIndex(b) >= 0 {
|
||||
return BytesOr(sg.matchers...).MatchIndex(b)
|
||||
}
|
||||
return -1
|
||||
}
|
10
vendor/gopkg.in/toqueteos/substring.v1/lib.go
generated
vendored
10
vendor/gopkg.in/toqueteos/substring.v1/lib.go
generated
vendored
@ -1,10 +0,0 @@
|
||||
package substring
|
||||
|
||||
// reverse is a helper fn for Suffixes
|
||||
func reverse(b []byte) []byte {
|
||||
n := len(b)
|
||||
for i := 0; i < n/2; i++ {
|
||||
b[i], b[n-1-i] = b[n-1-i], b[i]
|
||||
}
|
||||
return b
|
||||
}
|
216
vendor/gopkg.in/toqueteos/substring.v1/string.go
generated
vendored
216
vendor/gopkg.in/toqueteos/substring.v1/string.go
generated
vendored
@ -1,216 +0,0 @@
|
||||
package substring
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/toqueteos/trie"
|
||||
)
|
||||
|
||||
type StringsMatcher interface {
|
||||
Match(s string) bool
|
||||
MatchIndex(s string) int
|
||||
}
|
||||
|
||||
// regexp
|
||||
type regexpString struct{ re *regexp.Regexp }
|
||||
|
||||
func Regexp(pat string) *regexpString { return ®expString{regexp.MustCompile(pat)} }
|
||||
func (m *regexpString) Match(s string) bool { return m.re.MatchString(s) }
|
||||
func (m *regexpString) MatchIndex(s string) int {
|
||||
found := m.re.FindStringIndex(s)
|
||||
if found != nil {
|
||||
return found[1]
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// exact
|
||||
type exactString struct{ pat string }
|
||||
|
||||
func Exact(pat string) *exactString { return &exactString{pat} }
|
||||
func (m *exactString) Match(s string) bool { return m.pat == s }
|
||||
func (m *exactString) MatchIndex(s string) int {
|
||||
if m.pat == s {
|
||||
return len(s)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// any, search `s` in `.Match(pat)`
|
||||
type anyString struct{ pat string }
|
||||
|
||||
func Any(pat string) *anyString { return &anyString{pat} }
|
||||
func (m *anyString) Match(s string) bool {
|
||||
return strings.Index(m.pat, s) >= 0
|
||||
}
|
||||
func (m *anyString) MatchIndex(s string) int {
|
||||
if idx := strings.Index(m.pat, s); idx >= 0 {
|
||||
return idx + len(s)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// has, search `pat` in `.Match(s)`
|
||||
type hasString struct{ pat string }
|
||||
|
||||
func Has(pat string) *hasString { return &hasString{pat} }
|
||||
func (m *hasString) Match(s string) bool {
|
||||
return strings.Index(s, m.pat) >= 0
|
||||
}
|
||||
func (m *hasString) MatchIndex(s string) int {
|
||||
if idx := strings.Index(s, m.pat); idx >= 0 {
|
||||
return idx + len(m.pat)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// prefix
|
||||
type prefixString struct{ pat string }
|
||||
|
||||
func Prefix(pat string) *prefixString { return &prefixString{pat} }
|
||||
func (m *prefixString) Match(s string) bool { return strings.HasPrefix(s, m.pat) }
|
||||
func (m *prefixString) MatchIndex(s string) int {
|
||||
if strings.HasPrefix(s, m.pat) {
|
||||
return len(m.pat)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// prefixes
|
||||
type prefixesString struct{ t *trie.Trie }
|
||||
|
||||
func Prefixes(pats ...string) *prefixesString {
|
||||
t := trie.New()
|
||||
for _, pat := range pats {
|
||||
t.Insert([]byte(pat))
|
||||
}
|
||||
return &prefixesString{t}
|
||||
}
|
||||
func (m *prefixesString) Match(s string) bool { return m.t.PrefixIndex([]byte(s)) >= 0 }
|
||||
func (m *prefixesString) MatchIndex(s string) int {
|
||||
if idx := m.t.PrefixIndex([]byte(s)); idx >= 0 {
|
||||
return idx
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// suffix
|
||||
type suffixString struct{ pat string }
|
||||
|
||||
func Suffix(pat string) *suffixString { return &suffixString{pat} }
|
||||
func (m *suffixString) Match(s string) bool { return strings.HasSuffix(s, m.pat) }
|
||||
func (m *suffixString) MatchIndex(s string) int {
|
||||
if strings.HasSuffix(s, m.pat) {
|
||||
return len(m.pat)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// suffixes
|
||||
type suffixesString struct{ t *trie.Trie }
|
||||
|
||||
func Suffixes(pats ...string) *suffixesString {
|
||||
t := trie.New()
|
||||
for _, pat := range pats {
|
||||
t.Insert(reverse([]byte(pat)))
|
||||
}
|
||||
return &suffixesString{t}
|
||||
}
|
||||
func (m *suffixesString) Match(s string) bool {
|
||||
return m.t.PrefixIndex(reverse([]byte(s))) >= 0
|
||||
}
|
||||
func (m *suffixesString) MatchIndex(s string) int {
|
||||
if idx := m.t.PrefixIndex(reverse([]byte(s))); idx >= 0 {
|
||||
return idx
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// after
|
||||
type afterString struct {
|
||||
first string
|
||||
matcher StringsMatcher
|
||||
}
|
||||
|
||||
func After(first string, m StringsMatcher) *afterString {
|
||||
return &afterString{first, m}
|
||||
}
|
||||
func (a *afterString) Match(s string) bool {
|
||||
if idx := strings.Index(s, a.first); idx >= 0 {
|
||||
return a.matcher.Match(s[idx+len(a.first):])
|
||||
}
|
||||
return false
|
||||
}
|
||||
func (a *afterString) MatchIndex(s string) int {
|
||||
if idx := strings.Index(s, a.first); idx >= 0 {
|
||||
return idx + a.matcher.MatchIndex(s[idx+len(a.first):])
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// and, returns true iff all matchers return true
|
||||
type andString struct{ matchers []StringsMatcher }
|
||||
|
||||
func And(m ...StringsMatcher) *andString { return &andString{m} }
|
||||
func (a *andString) Match(s string) bool {
|
||||
for _, m := range a.matchers {
|
||||
if !m.Match(s) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
func (a *andString) MatchIndex(s string) int {
|
||||
longest := 0
|
||||
for _, m := range a.matchers {
|
||||
if idx := m.MatchIndex(s); idx < 0 {
|
||||
return -1
|
||||
} else if idx > longest {
|
||||
longest = idx
|
||||
}
|
||||
}
|
||||
return longest
|
||||
}
|
||||
|
||||
// or, returns true iff any matcher returns true
|
||||
type orString struct{ matchers []StringsMatcher }
|
||||
|
||||
func Or(m ...StringsMatcher) *orString { return &orString{m} }
|
||||
func (o *orString) Match(s string) bool {
|
||||
for _, m := range o.matchers {
|
||||
if m.Match(s) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
func (o *orString) MatchIndex(s string) int {
|
||||
for _, m := range o.matchers {
|
||||
if idx := m.MatchIndex(s); idx >= 0 {
|
||||
return idx
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
type suffixGroupString struct {
|
||||
suffix StringsMatcher
|
||||
matchers []StringsMatcher
|
||||
}
|
||||
|
||||
func SuffixGroup(s string, m ...StringsMatcher) *suffixGroupString {
|
||||
return &suffixGroupString{Suffix(s), m}
|
||||
}
|
||||
func (sg *suffixGroupString) Match(s string) bool {
|
||||
if sg.suffix.Match(s) {
|
||||
return Or(sg.matchers...).Match(s)
|
||||
}
|
||||
return false
|
||||
}
|
||||
func (sg *suffixGroupString) MatchIndex(s string) int {
|
||||
if sg.suffix.MatchIndex(s) >= 0 {
|
||||
return Or(sg.matchers...).MatchIndex(s)
|
||||
}
|
||||
return -1
|
||||
}
|
8
vendor/modules.txt
vendored
8
vendor/modules.txt
vendored
@ -202,7 +202,7 @@ github.com/gliderlabs/ssh
|
||||
# github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a
|
||||
## explicit
|
||||
github.com/glycerine/go-unsnap-stream
|
||||
# github.com/go-enry/go-enry/v2 v2.3.0
|
||||
# github.com/go-enry/go-enry/v2 v2.5.2
|
||||
## explicit
|
||||
github.com/go-enry/go-enry/v2
|
||||
github.com/go-enry/go-enry/v2/data
|
||||
@ -210,7 +210,7 @@ github.com/go-enry/go-enry/v2/data/rule
|
||||
github.com/go-enry/go-enry/v2/internal/tokenizer
|
||||
github.com/go-enry/go-enry/v2/internal/tokenizer/flex
|
||||
github.com/go-enry/go-enry/v2/regex
|
||||
# github.com/go-enry/go-oniguruma v1.2.0
|
||||
# github.com/go-enry/go-oniguruma v1.2.1
|
||||
github.com/go-enry/go-oniguruma
|
||||
# github.com/go-git/gcfg v1.5.0
|
||||
github.com/go-git/gcfg
|
||||
@ -614,8 +614,6 @@ github.com/syndtr/goleveldb/leveldb/util
|
||||
# github.com/tinylib/msgp v1.1.2
|
||||
## explicit
|
||||
github.com/tinylib/msgp/msgp
|
||||
# github.com/toqueteos/trie v1.0.0
|
||||
github.com/toqueteos/trie
|
||||
# github.com/toqueteos/webbrowser v1.2.0
|
||||
github.com/toqueteos/webbrowser
|
||||
# github.com/tstranex/u2f v1.0.0
|
||||
@ -836,8 +834,6 @@ gopkg.in/ldap.v3
|
||||
# gopkg.in/testfixtures.v2 v2.5.0
|
||||
## explicit
|
||||
gopkg.in/testfixtures.v2
|
||||
# gopkg.in/toqueteos/substring.v1 v1.0.2
|
||||
gopkg.in/toqueteos/substring.v1
|
||||
# gopkg.in/warnings.v0 v0.1.2
|
||||
gopkg.in/warnings.v0
|
||||
# gopkg.in/yaml.v2 v2.2.8
|
||||
|
Reference in New Issue
Block a user