Documentation ¶
Overview ¶
Package licenseclassifier provides methods to identify the open source license that most closely matches an unknown license.
Copyright 2017 Google Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2017 Google Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Index ¶
Constants ¶
const ( // LicenseArchive is the name of the archive containing preprocessed // license texts. LicenseArchive = "licenses.db" // ForbiddenLicenseArchive is the name of the archive containing preprocessed // forbidden license texts only. ForbiddenLicenseArchive = "forbidden_licenses.db" )
const ( // The names come from the https://spdx.org/licenses website, and are // also the filenames of the licenses in licenseclassifier/licenses. AFL11 = "AFL-1.1" AFL12 = "AFL-1.2" AFL20 = "AFL-2.0" AFL21 = "AFL-2.1" AFL30 = "AFL-3.0" AGPL10 = "AGPL-1.0" AGPL30 = "AGPL-3.0" Apache10 = "Apache-1.0" Apache11 = "Apache-1.1" Apache20 = "Apache-2.0" APSL10 = "APSL-1.0" APSL11 = "APSL-1.1" APSL12 = "APSL-1.2" APSL20 = "APSL-2.0" Artistic10cl8 = "Artistic-1.0-cl8" Artistic10Perl = "Artistic-1.0-Perl" Artistic10 = "Artistic-1.0" Artistic20 = "Artistic-2.0" BCL = "BCL" Beerware = "Beerware" BSD2ClauseFreeBSD = "BSD-2-Clause-FreeBSD" BSD2ClauseNetBSD = "BSD-2-Clause-NetBSD" BSD2Clause = "BSD-2-Clause" BSD3ClauseAttribution = "BSD-3-Clause-Attribution" BSD3ClauseClear = "BSD-3-Clause-Clear" BSD3ClauseLBNL = "BSD-3-Clause-LBNL" BSD3Clause = "BSD-3-Clause" BSD4Clause = "BSD-4-Clause" BSD4ClauseUC = "BSD-4-Clause-UC" BSDProtection = "BSD-Protection" BSL10 = "BSL-1.0" CC010 = "CC0-1.0" CCBY10 = "CC-BY-1.0" CCBY20 = "CC-BY-2.0" CCBY25 = "CC-BY-2.5" CCBY30 = "CC-BY-3.0" CCBY40 = "CC-BY-4.0" CCBYNC10 = "CC-BY-NC-1.0" CCBYNC20 = "CC-BY-NC-2.0" CCBYNC25 = "CC-BY-NC-2.5" CCBYNC30 = "CC-BY-NC-3.0" CCBYNC40 = "CC-BY-NC-4.0" CCBYNCND10 = "CC-BY-NC-ND-1.0" CCBYNCND20 = "CC-BY-NC-ND-2.0" CCBYNCND25 = "CC-BY-NC-ND-2.5" CCBYNCND30 = "CC-BY-NC-ND-3.0" CCBYNCND40 = "CC-BY-NC-ND-4.0" CCBYNCSA10 = "CC-BY-NC-SA-1.0" CCBYNCSA20 = "CC-BY-NC-SA-2.0" CCBYNCSA25 = "CC-BY-NC-SA-2.5" CCBYNCSA30 = "CC-BY-NC-SA-3.0" CCBYNCSA40 = "CC-BY-NC-SA-4.0" CCBYND10 = "CC-BY-ND-1.0" CCBYND20 = "CC-BY-ND-2.0" CCBYND25 = "CC-BY-ND-2.5" CCBYND30 = "CC-BY-ND-3.0" CCBYND40 = "CC-BY-ND-4.0" CCBYSA10 = "CC-BY-SA-1.0" CCBYSA20 = "CC-BY-SA-2.0" CCBYSA25 = "CC-BY-SA-2.5" CCBYSA30 = "CC-BY-SA-3.0" CCBYSA40 = "CC-BY-SA-4.0" CDDL10 = "CDDL-1.0" CDDL11 = "CDDL-1.1" CPAL10 = "CPAL-1.0" CPL10 = "CPL-1.0" EPL10 = "EPL-1.0" EUPL10 = "EUPL-1.0" EUPL11 = "EUPL-1.1" Facebook2Clause = "Facebook-2-Clause" Facebook3Clause = "Facebook-3-Clause" FacebookExamples = "Facebook-Examples" FreeImage = "FreeImage" FTL = "FTL" GPL10 = "GPL-1.0" GPL20 = "GPL-2.0" GPL20withautoconfexception = "GPL-2.0-with-autoconf-exception" GPL20withbisonexception = "GPL-2.0-with-bison-exception" GPL20withclasspathexception = "GPL-2.0-with-classpath-exception" GPL20withfontexception = "GPL-2.0-with-font-exception" GPL20withGCCexception = "GPL-2.0-with-GCC-exception" GPL30 = "GPL-3.0" GPL30withautoconfexception = "GPL-3.0-with-autoconf-exception" GPL30withGCCexception = "GPL-3.0-with-GCC-exception" ImageMagick = "ImageMagick" IPL10 = "IPL-1.0" ISC = "ISC" LGPL20 = "LGPL-2.0" LGPL21 = "LGPL-2.1" LGPL30 = "LGPL-3.0" LGPLLR = "LGPLLR" Libpng = "Libpng" LPL102 = "LPL-1.02" LPL10 = "LPL-1.0" MIT = "MIT" MPL10 = "MPL-1.0" MPL11 = "MPL-1.1" MPL20 = "MPL-2.0" MSPL = "MS-PL" NCSA = "NCSA" NPL10 = "NPL-1.0" NPL11 = "NPL-1.1" OpenSSL = "OpenSSL" OSL10 = "OSL-1.0" OSL11 = "OSL-1.1" OSL20 = "OSL-2.0" OSL21 = "OSL-2.1" OSL30 = "OSL-3.0" PHP301 = "PHP-3.01" PHP30 = "PHP-3.0" PIL = "PIL" Python20 = "Python-2.0" QPL10 = "QPL-1.0" Ruby = "Ruby" SGIB10 = "SGI-B-1.0" SGIB11 = "SGI-B-1.1" SGIB20 = "SGI-B-2.0" SISSL12 = "SISSL-1.2" SISSL = "SISSL" Sleepycat = "Sleepycat" UnicodeTOU = "Unicode-TOU" Unlicense = "Unlicense" W3C19980720 = "W3C-19980720" W3C = "W3C" WTFPL = "WTFPL" X11 = "X11" Xnet = "Xnet" Zend20 = "Zend-2.0" ZlibAcknowledgement = "zlib-acknowledgement" Zlib = "Zlib" ZPL11 = "ZPL-1.1" ZPL20 = "ZPL-2.0" ZPL21 = "ZPL-2.1" )
Canonical names of the licenses.
const DefaultConfidenceThreshold = 0.80
DefaultConfidenceThreshold is the minimum confidence percentage we're willing to accept in order to say that a match is good. http://go/license-classifier-conf-threshold
Variables ¶
var LicenseDirectory = ""
LicenseDirectory is the directory where the prototype licenses are kept.
var ( // LicenseTypes is a set of the types of licenses Google recognizes. LicenseTypes = sets.NewStringSet( "restricted", "reciprocal", "notice", "permissive", "unencumbered", "by_exception_only", ) )
var ( // Normalizers is a list of functions that get applied to the strings // before they are registered with the string classifier. Normalizers = []stringclassifier.NormalizeFunc{ html.UnescapeString, removeShebangLine, removeNonWords, normalizeEquivalentWords, normalizePunctuation, strings.ToLower, removeIgnorableTexts, stringclassifier.FlattenWhitespace, strings.TrimSpace, } )
Functions ¶
func CopyrightHolder ¶
CopyrightHolder finds a copyright notification, if it exists, and returns the copyright holder.
func LicenseType ¶
LicenseType returns the type the license has.
func LicensesDir ¶
func LicensesDir(licensesDir string)
set licenses directory where all licenses are located
func ReadLicenseDir ¶
ReadLicenseDir reads directory containing the license files.
func ReadLicenseFile ¶
ReadLicenseFile locates and reads the license file.
func TrimExtraneousTrailingText ¶
TrimExtraneousTrailingText removes text after an obvious end of the license and does not include substantive text of the license.
Types ¶
type License ¶
type License struct { // Threshold is the lowest confidence percentage acceptable for the // classifier. Threshold float64 // contains filtered or unexported fields }
License is a classifier pre-loaded with known open source licenses.
func NewWithForbiddenLicenses ¶
NewWithForbiddenLicenses creates a license classifier and pre-loads it with known open source licenses which are forbidden.
func (*License) HasPublicDomainNotice ¶
HasPublicDomainNotice performs a simple regex over the contents to see if a public domain notice is in there. As you can imagine, this isn't 100% definitive, but can be useful if a license match isn't found.
func (*License) MultipleMatch ¶
func (c *License) MultipleMatch(contents string, includeHeaders bool) stringclassifier.Matches
MultipleMatch matches all licenses within an unknown text.
func (*License) NearestMatch ¶
func (c *License) NearestMatch(contents string) *stringclassifier.Match
NearestMatch returns the "nearest" match to the given set of known licenses. Returned are the name of the license, and a confidence percentage indicating how confident the classifier is in the result.
func (*License) WithinConfidenceThreshold ¶
WithinConfidenceThreshold returns true if the confidence value is above or equal to the confidence threshold.
Directories ¶
Path | Synopsis |
---|---|
internal
|
|
commentparser
Package commentparser does a basic parse over a source file and returns all of the comments from the code.
|
Package commentparser does a basic parse over a source file and returns all of the comments from the code. |
commentparser/language
Package language contains methods and information about the different programming languages the comment parser supports.
|
Package language contains methods and information about the different programming languages the comment parser supports. |
sets
Package sets provides sets for storing collections of unique elements.
|
Package sets provides sets for storing collections of unique elements. |
Package serializer normalizes the license text and calculates the hash values for all substrings in the license.
|
Package serializer normalizes the license text and calculates the hash values for all substrings in the license. |
The identify_license program tries to identify the license type of an unknown license.
|
The identify_license program tries to identify the license type of an unknown license. |