spdxcheck.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. #!/usr/bin/env python
  2. # SPDX-License-Identifier: GPL-2.0
  3. # Copyright Thomas Gleixner <tglx@linutronix.de>
  4. from argparse import ArgumentParser
  5. from ply import lex, yacc
  6. import traceback
  7. import sys
  8. import git
  9. import re
  10. import os
  11. class ParserException(Exception):
  12. def __init__(self, tok, txt):
  13. self.tok = tok
  14. self.txt = txt
  15. class SPDXException(Exception):
  16. def __init__(self, el, txt):
  17. self.el = el
  18. self.txt = txt
  19. class SPDXdata(object):
  20. def __init__(self):
  21. self.license_files = 0
  22. self.exception_files = 0
  23. self.licenses = [ ]
  24. self.exceptions = { }
  25. # Read the spdx data from the LICENSES directory
  26. def read_spdxdata(repo):
  27. # The subdirectories of LICENSES in the kernel source
  28. license_dirs = [ "preferred", "other", "exceptions" ]
  29. lictree = repo.heads.master.commit.tree['LICENSES']
  30. spdx = SPDXdata()
  31. for d in license_dirs:
  32. for el in lictree[d].traverse():
  33. if not os.path.isfile(el.path):
  34. continue
  35. exception = None
  36. for l in open(el.path).readlines():
  37. if l.startswith('Valid-License-Identifier:'):
  38. lid = l.split(':')[1].strip().upper()
  39. if lid in spdx.licenses:
  40. raise SPDXException(el, 'Duplicate License Identifier: %s' %lid)
  41. else:
  42. spdx.licenses.append(lid)
  43. elif l.startswith('SPDX-Exception-Identifier:'):
  44. exception = l.split(':')[1].strip().upper()
  45. spdx.exceptions[exception] = []
  46. elif l.startswith('SPDX-Licenses:'):
  47. for lic in l.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
  48. if not lic in spdx.licenses:
  49. raise SPDXException(None, 'Exception %s missing license %s' %(ex, lic))
  50. spdx.exceptions[exception].append(lic)
  51. elif l.startswith("License-Text:"):
  52. if exception:
  53. if not len(spdx.exceptions[exception]):
  54. raise SPDXException(el, 'Exception %s is missing SPDX-Licenses' %excid)
  55. spdx.exception_files += 1
  56. else:
  57. spdx.license_files += 1
  58. break
  59. return spdx
  60. class id_parser(object):
  61. reserved = [ 'AND', 'OR', 'WITH' ]
  62. tokens = [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
  63. precedence = ( ('nonassoc', 'AND', 'OR'), )
  64. t_ignore = ' \t'
  65. def __init__(self, spdx):
  66. self.spdx = spdx
  67. self.lasttok = None
  68. self.lastid = None
  69. self.lexer = lex.lex(module = self, reflags = re.UNICODE)
  70. # Initialize the parser. No debug file and no parser rules stored on disk
  71. # The rules are small enough to be generated on the fly
  72. self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
  73. self.lines_checked = 0
  74. self.checked = 0
  75. self.spdx_valid = 0
  76. self.spdx_errors = 0
  77. self.curline = 0
  78. self.deepest = 0
  79. # Validate License and Exception IDs
  80. def validate(self, tok):
  81. id = tok.value.upper()
  82. if tok.type == 'ID':
  83. if not id in self.spdx.licenses:
  84. raise ParserException(tok, 'Invalid License ID')
  85. self.lastid = id
  86. elif tok.type == 'EXC':
  87. if not self.spdx.exceptions.has_key(id):
  88. raise ParserException(tok, 'Invalid Exception ID')
  89. if self.lastid not in self.spdx.exceptions[id]:
  90. raise ParserException(tok, 'Exception not valid for license %s' %self.lastid)
  91. self.lastid = None
  92. elif tok.type != 'WITH':
  93. self.lastid = None
  94. # Lexer functions
  95. def t_RPAR(self, tok):
  96. r'\)'
  97. self.lasttok = tok.type
  98. return tok
  99. def t_LPAR(self, tok):
  100. r'\('
  101. self.lasttok = tok.type
  102. return tok
  103. def t_ID(self, tok):
  104. r'[A-Za-z.0-9\-+]+'
  105. if self.lasttok == 'EXC':
  106. print(tok)
  107. raise ParserException(tok, 'Missing parentheses')
  108. tok.value = tok.value.strip()
  109. val = tok.value.upper()
  110. if val in self.reserved:
  111. tok.type = val
  112. elif self.lasttok == 'WITH':
  113. tok.type = 'EXC'
  114. self.lasttok = tok.type
  115. self.validate(tok)
  116. return tok
  117. def t_error(self, tok):
  118. raise ParserException(tok, 'Invalid token')
  119. def p_expr(self, p):
  120. '''expr : ID
  121. | ID WITH EXC
  122. | expr AND expr
  123. | expr OR expr
  124. | LPAR expr RPAR'''
  125. pass
  126. def p_error(self, p):
  127. if not p:
  128. raise ParserException(None, 'Unfinished license expression')
  129. else:
  130. raise ParserException(p, 'Syntax error')
  131. def parse(self, expr):
  132. self.lasttok = None
  133. self.lastid = None
  134. self.parser.parse(expr, lexer = self.lexer)
  135. def parse_lines(self, fd, maxlines, fname):
  136. self.checked += 1
  137. self.curline = 0
  138. try:
  139. for line in fd:
  140. self.curline += 1
  141. if self.curline > maxlines:
  142. break
  143. self.lines_checked += 1
  144. if line.find("SPDX-License-Identifier:") < 0:
  145. continue
  146. expr = line.split(':')[1].replace('*/', '').strip()
  147. self.parse(expr)
  148. self.spdx_valid += 1
  149. #
  150. # Should we check for more SPDX ids in the same file and
  151. # complain if there are any?
  152. #
  153. break
  154. except ParserException as pe:
  155. if pe.tok:
  156. col = line.find(expr) + pe.tok.lexpos
  157. tok = pe.tok.value
  158. sys.stdout.write('%s: %d:%d %s: %s\n' %(fname, self.curline, col, pe.txt, tok))
  159. else:
  160. sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, col, pe.txt))
  161. self.spdx_errors += 1
  162. def scan_git_tree(tree):
  163. for el in tree.traverse():
  164. # Exclude stuff which would make pointless noise
  165. # FIXME: Put this somewhere more sensible
  166. if el.path.startswith("LICENSES"):
  167. continue
  168. if el.path.find("license-rules.rst") >= 0:
  169. continue
  170. if el.path == 'scripts/checkpatch.pl':
  171. continue
  172. if not os.path.isfile(el.path):
  173. continue
  174. parser.parse_lines(open(el.path), args.maxlines, el.path)
  175. def scan_git_subtree(tree, path):
  176. for p in path.strip('/').split('/'):
  177. tree = tree[p]
  178. scan_git_tree(tree)
  179. if __name__ == '__main__':
  180. ap = ArgumentParser(description='SPDX expression checker')
  181. ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
  182. ap.add_argument('-m', '--maxlines', type=int, default=15,
  183. help='Maximum number of lines to scan in a file. Default 15')
  184. ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output')
  185. args = ap.parse_args()
  186. # Sanity check path arguments
  187. if '-' in args.path and len(args.path) > 1:
  188. sys.stderr.write('stdin input "-" must be the only path argument\n')
  189. sys.exit(1)
  190. try:
  191. # Use git to get the valid license expressions
  192. repo = git.Repo(os.getcwd())
  193. assert not repo.bare
  194. # Initialize SPDX data
  195. spdx = read_spdxdata(repo)
  196. # Initilize the parser
  197. parser = id_parser(spdx)
  198. except SPDXException as se:
  199. if se.el:
  200. sys.stderr.write('%s: %s\n' %(se.el.path, se.txt))
  201. else:
  202. sys.stderr.write('%s\n' %se.txt)
  203. sys.exit(1)
  204. except Exception as ex:
  205. sys.stderr.write('FAIL: %s\n' %ex)
  206. sys.stderr.write('%s\n' %traceback.format_exc())
  207. sys.exit(1)
  208. try:
  209. if len(args.path) and args.path[0] == '-':
  210. parser.parse_lines(sys.stdin, args.maxlines, '-')
  211. else:
  212. if args.path:
  213. for p in args.path:
  214. if os.path.isfile(p):
  215. parser.parse_lines(open(p), args.maxlines, p)
  216. elif os.path.isdir(p):
  217. scan_git_subtree(repo.head.reference.commit.tree, p)
  218. else:
  219. sys.stderr.write('path %s does not exist\n' %p)
  220. sys.exit(1)
  221. else:
  222. # Full git tree scan
  223. scan_git_tree(repo.head.commit.tree)
  224. if args.verbose:
  225. sys.stderr.write('\n')
  226. sys.stderr.write('License files: %12d\n' %spdx.license_files)
  227. sys.stderr.write('Exception files: %12d\n' %spdx.exception_files)
  228. sys.stderr.write('License IDs %12d\n' %len(spdx.licenses))
  229. sys.stderr.write('Exception IDs %12d\n' %len(spdx.exceptions))
  230. sys.stderr.write('\n')
  231. sys.stderr.write('Files checked: %12d\n' %parser.checked)
  232. sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked)
  233. sys.stderr.write('Files with SPDX: %12d\n' %parser.spdx_valid)
  234. sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors)
  235. sys.exit(0)
  236. except Exception as ex:
  237. sys.stderr.write('FAIL: %s\n' %ex)
  238. sys.stderr.write('%s\n' %traceback.format_exc())
  239. sys.exit(1)