lex.go 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // based on the lexer from: src/pkg/text/template/parse/lex.go (golang source)
  5. package pkgbuild
  6. import (
  7. "fmt"
  8. "strings"
  9. "unicode"
  10. "unicode/utf8"
  11. )
  12. // pos is a position in input being scanned
  13. type pos int
  14. type item struct {
  15. typ itemType
  16. pos pos
  17. val string
  18. }
  19. func (i item) String() string {
  20. switch {
  21. case i.typ == itemEOF:
  22. return "EOF"
  23. case i.typ == itemError:
  24. return i.val
  25. case len(i.val) > 10:
  26. return fmt.Sprintf("%.10q...", i.val)
  27. }
  28. return fmt.Sprintf("%q", i.val)
  29. }
  30. type itemType int
  31. const (
  32. itemError itemType = iota
  33. itemEOF
  34. itemVariable
  35. itemValue
  36. itemEndSplit
  37. // PKGBUILD variables
  38. itemPkgname // pkgname variable
  39. itemPkgver // pkgver variable
  40. itemPkgrel // pkgrel variable
  41. itemPkgdir // pkgdir variable
  42. itemEpoch // epoch variable
  43. itemPkgbase // pkgbase variable
  44. itemPkgdesc // pkgdesc variable
  45. itemArch // arch variable
  46. itemURL // url variable
  47. itemLicense // license variable
  48. itemGroups // groups variable
  49. itemDepends // depends variable
  50. itemOptdepends // optdepends variable
  51. itemMakedepends // makedepends variable
  52. itemCheckdepends // checkdepends variable
  53. itemProvides // provides variable
  54. itemConflicts // conflicts variable
  55. itemReplaces // replaces variable
  56. itemBackup // backup variable
  57. itemOptions // options variable
  58. itemInstall // install variable
  59. itemChangelog // changelog variable
  60. itemSource // source variable
  61. itemNoextract // noextract variable
  62. itemMd5sums // md5sums variable
  63. itemSha1sums // sha1sums variable
  64. itemSha224sums // sha224sums variable
  65. itemSha256sums // sha256sums variable
  66. itemSha384sums // sha384sums variable
  67. itemSha512sums // sha512sums variable
  68. itemValidpgpkeys // validpgpkeys variable
  69. )
  70. // PKGBUILD variables
  71. var variables = map[string]itemType{
  72. "pkgname": itemPkgname,
  73. "pkgver": itemPkgver,
  74. "pkgrel": itemPkgrel,
  75. "pkgdir": itemPkgdir,
  76. "epoch": itemEpoch,
  77. "pkgbase": itemPkgbase,
  78. "pkgdesc": itemPkgdesc,
  79. "arch": itemArch,
  80. "url": itemURL,
  81. "license": itemLicense,
  82. "groups": itemGroups,
  83. "depends": itemDepends,
  84. "optdepends": itemOptdepends,
  85. "makedepends": itemMakedepends,
  86. "checkdepends": itemCheckdepends,
  87. "provides": itemProvides,
  88. "conflicts": itemConflicts,
  89. "replaces": itemReplaces,
  90. "backup": itemBackup,
  91. "options": itemOptions,
  92. "install": itemInstall,
  93. "changelog": itemChangelog,
  94. "source": itemSource,
  95. "noextract": itemNoextract,
  96. "md5sums": itemMd5sums,
  97. "sha1sums": itemSha1sums,
  98. "sha224sums": itemSha224sums,
  99. "sha256sums": itemSha256sums,
  100. "sha384sums": itemSha384sums,
  101. "sha512sums": itemSha512sums,
  102. "validpgpkeys": itemValidpgpkeys,
  103. }
  104. const eof = -1
  105. // stateFn represents the state of the scanner as a function that returns the next state
  106. type stateFn func(*lexer) stateFn
  107. // lexer holds the state of the scanner
  108. type lexer struct {
  109. input string
  110. state stateFn
  111. pos pos
  112. start pos
  113. width pos
  114. lastPos pos
  115. items chan item // channel of scanned items
  116. }
  117. // next returns the next rune in the input
  118. func (l *lexer) next() rune {
  119. if int(l.pos) >= len(l.input) {
  120. l.width = 0
  121. return eof
  122. }
  123. r, w := utf8.DecodeRuneInString(l.input[l.pos:])
  124. l.width = pos(w)
  125. l.pos += l.width
  126. return r
  127. }
  128. // peek returns but does not consume the next rune in the input
  129. func (l *lexer) peek() rune {
  130. r := l.next()
  131. l.backup()
  132. return r
  133. }
  134. // backup steps back one rune. Can only be called once per call of next
  135. func (l *lexer) backup() {
  136. l.pos -= l.width
  137. }
  138. // emit passes an item back to the client
  139. func (l *lexer) emit(t itemType) {
  140. l.items <- item{t, l.start, l.input[l.start:l.pos]}
  141. l.start = l.pos
  142. }
  143. // ignore skips over the pending input before this point
  144. func (l *lexer) ignore() {
  145. l.start = l.pos
  146. }
  147. // errorf returns an error token and terminates the scan by passing
  148. // back a nil pointer that will be the next state, terminating l.nextItem.
  149. func (l *lexer) errorf(format string, args ...interface{}) stateFn {
  150. l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
  151. return nil
  152. }
  153. // nextItem returns the next item from the input.
  154. func (l *lexer) nextItem() item {
  155. item := <-l.items
  156. l.lastPos = item.pos
  157. return item
  158. }
  159. func lex(input string) *lexer {
  160. l := &lexer{
  161. input: input,
  162. items: make(chan item),
  163. }
  164. go l.run()
  165. return l
  166. }
  167. func (l *lexer) run() {
  168. for l.state = lexEnv; l.state != nil; {
  169. l.state = l.state(l)
  170. }
  171. }
  172. func lexEnv(l *lexer) stateFn {
  173. var r rune
  174. for {
  175. switch r = l.next(); {
  176. case r == eof:
  177. l.emit(itemEOF)
  178. return nil
  179. case isAlphaNumericUnderscore(r):
  180. return lexVariable
  181. case r == '\n':
  182. buffer := l.input[l.start:l.pos]
  183. if buffer == "\n" {
  184. if l.peek() == '\n' {
  185. l.next()
  186. l.emit(itemEndSplit)
  187. }
  188. l.ignore()
  189. }
  190. case r == '\t':
  191. l.ignore()
  192. case r == ' ':
  193. l.ignore()
  194. case r == '#':
  195. return lexComment
  196. default:
  197. l.errorf("unable to parse character: %c", r)
  198. }
  199. }
  200. }
  201. func lexComment(l *lexer) stateFn {
  202. for {
  203. switch l.next() {
  204. case '\n':
  205. l.ignore()
  206. return lexEnv
  207. case eof:
  208. l.emit(itemEOF)
  209. return nil
  210. }
  211. }
  212. }
  213. func lexVariable(l *lexer) stateFn {
  214. for {
  215. switch r := l.next(); {
  216. case isAlphaNumericUnderscore(r):
  217. // absorb
  218. case r == ' ' && l.peek() == '=':
  219. l.backup()
  220. variable := l.input[l.start:l.pos]
  221. // strip arch from source_arch like constructs
  222. witharch := strings.SplitN(variable, "_", 2)
  223. if len(witharch) == 2 {
  224. variable = witharch[0]
  225. }
  226. if _, ok := variables[variable]; ok {
  227. l.emit(variables[variable])
  228. // TODO to cut off ' = '
  229. l.next()
  230. l.next()
  231. l.next()
  232. l.ignore()
  233. return lexValue
  234. }
  235. return l.errorf("invalid variable: %s", variable)
  236. default:
  237. pattern := l.input[l.start:l.pos]
  238. return l.errorf("invalid pattern: %s", pattern)
  239. }
  240. }
  241. }
  242. func lexValue(l *lexer) stateFn {
  243. for {
  244. switch l.next() {
  245. case '\n':
  246. l.backup()
  247. l.emit(itemValue)
  248. return lexEnv
  249. }
  250. }
  251. }
  252. // isAlphaNumericUnderscore reports whether r is an alphabetic, digit, or underscore.
  253. func isAlphaNumericUnderscore(r rune) bool {
  254. return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
  255. }