config.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. package configs
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "fmt"
  6. "os/exec"
  7. "time"
  8. "github.com/Sirupsen/logrus"
  9. )
  10. type Rlimit struct {
  11. Type int `json:"type"`
  12. Hard uint64 `json:"hard"`
  13. Soft uint64 `json:"soft"`
  14. }
  15. // IDMap represents UID/GID Mappings for User Namespaces.
  16. type IDMap struct {
  17. ContainerID int `json:"container_id"`
  18. HostID int `json:"host_id"`
  19. Size int `json:"size"`
  20. }
  21. // Seccomp represents syscall restrictions
  22. // By default, only the native architecture of the kernel is allowed to be used
  23. // for syscalls. Additional architectures can be added by specifying them in
  24. // Architectures.
  25. type Seccomp struct {
  26. DefaultAction Action `json:"default_action"`
  27. Architectures []string `json:"architectures"`
  28. Syscalls []*Syscall `json:"syscalls"`
  29. }
  30. // An action to be taken upon rule match in Seccomp
  31. type Action int
  32. const (
  33. Kill Action = iota + 1
  34. Errno
  35. Trap
  36. Allow
  37. Trace
  38. )
  39. // A comparison operator to be used when matching syscall arguments in Seccomp
  40. type Operator int
  41. const (
  42. EqualTo Operator = iota + 1
  43. NotEqualTo
  44. GreaterThan
  45. GreaterThanOrEqualTo
  46. LessThan
  47. LessThanOrEqualTo
  48. MaskEqualTo
  49. )
  50. // A rule to match a specific syscall argument in Seccomp
  51. type Arg struct {
  52. Index uint `json:"index"`
  53. Value uint64 `json:"value"`
  54. ValueTwo uint64 `json:"value_two"`
  55. Op Operator `json:"op"`
  56. }
  57. // An rule to match a syscall in Seccomp
  58. type Syscall struct {
  59. Name string `json:"name"`
  60. Action Action `json:"action"`
  61. Args []*Arg `json:"args"`
  62. }
  63. // TODO Windows. Many of these fields should be factored out into those parts
  64. // which are common across platforms, and those which are platform specific.
  65. // Config defines configuration options for executing a process inside a contained environment.
  66. type Config struct {
  67. // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
  68. // This is a common option when the container is running in ramdisk
  69. NoPivotRoot bool `json:"no_pivot_root"`
  70. // ParentDeathSignal specifies the signal that is sent to the container's process in the case
  71. // that the parent process dies.
  72. ParentDeathSignal int `json:"parent_death_signal"`
  73. // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
  74. // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
  75. // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
  76. PivotDir string `json:"pivot_dir"`
  77. // Path to a directory containing the container's root filesystem.
  78. Rootfs string `json:"rootfs"`
  79. // Readonlyfs will remount the container's rootfs as readonly where only externally mounted
  80. // bind mounts are writtable.
  81. Readonlyfs bool `json:"readonlyfs"`
  82. // Specifies the mount propagation flags to be applied to /.
  83. RootPropagation int `json:"rootPropagation"`
  84. // Mounts specify additional source and destination paths that will be mounted inside the container's
  85. // rootfs and mount namespace if specified
  86. Mounts []*Mount `json:"mounts"`
  87. // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
  88. Devices []*Device `json:"devices"`
  89. MountLabel string `json:"mount_label"`
  90. // Hostname optionally sets the container's hostname if provided
  91. Hostname string `json:"hostname"`
  92. // Namespaces specifies the container's namespaces that it should setup when cloning the init process
  93. // If a namespace is not provided that namespace is shared from the container's parent process
  94. Namespaces Namespaces `json:"namespaces"`
  95. // Capabilities specify the capabilities to keep when executing the process inside the container
  96. // All capbilities not specified will be dropped from the processes capability mask
  97. Capabilities []string `json:"capabilities"`
  98. // Networks specifies the container's network setup to be created
  99. Networks []*Network `json:"networks"`
  100. // Routes can be specified to create entries in the route table as the container is started
  101. Routes []*Route `json:"routes"`
  102. // Cgroups specifies specific cgroup settings for the various subsystems that the container is
  103. // placed into to limit the resources the container has available
  104. Cgroups *Cgroup `json:"cgroups"`
  105. // AppArmorProfile specifies the profile to apply to the process running in the container and is
  106. // change at the time the process is execed
  107. AppArmorProfile string `json:"apparmor_profile,omitempty"`
  108. // ProcessLabel specifies the label to apply to the process running in the container. It is
  109. // commonly used by selinux
  110. ProcessLabel string `json:"process_label,omitempty"`
  111. // Rlimits specifies the resource limits, such as max open files, to set in the container
  112. // If Rlimits are not set, the container will inherit rlimits from the parent process
  113. Rlimits []Rlimit `json:"rlimits,omitempty"`
  114. // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
  115. // for a process. Valid values are between the range [-1000, '1000'], where processes with
  116. // higher scores are preferred for being killed.
  117. // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
  118. OomScoreAdj int `json:"oom_score_adj"`
  119. // AdditionalGroups specifies the gids that should be added to supplementary groups
  120. // in addition to those that the user belongs to.
  121. AdditionalGroups []string `json:"additional_groups"`
  122. // UidMappings is an array of User ID mappings for User Namespaces
  123. UidMappings []IDMap `json:"uid_mappings"`
  124. // GidMappings is an array of Group ID mappings for User Namespaces
  125. GidMappings []IDMap `json:"gid_mappings"`
  126. // MaskPaths specifies paths within the container's rootfs to mask over with a bind
  127. // mount pointing to /dev/null as to prevent reads of the file.
  128. MaskPaths []string `json:"mask_paths"`
  129. // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
  130. // so that these files prevent any writes.
  131. ReadonlyPaths []string `json:"readonly_paths"`
  132. // Sysctl is a map of properties and their values. It is the equivalent of using
  133. // sysctl -w my.property.name value in Linux.
  134. Sysctl map[string]string `json:"sysctl"`
  135. // Seccomp allows actions to be taken whenever a syscall is made within the container.
  136. // A number of rules are given, each having an action to be taken if a syscall matches it.
  137. // A default action to be taken if no rules match is also given.
  138. Seccomp *Seccomp `json:"seccomp"`
  139. // NoNewPrivileges controls whether processes in the container can gain additional privileges.
  140. NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
  141. // Hooks are a collection of actions to perform at various container lifecycle events.
  142. // CommandHooks are serialized to JSON, but other hooks are not.
  143. Hooks *Hooks
  144. // Version is the version of opencontainer specification that is supported.
  145. Version string `json:"version"`
  146. // Labels are user defined metadata that is stored in the config and populated on the state
  147. Labels []string `json:"labels"`
  148. }
  149. type Hooks struct {
  150. // Prestart commands are executed after the container namespaces are created,
  151. // but before the user supplied command is executed from init.
  152. Prestart []Hook
  153. // Poststart commands are executed after the container init process starts.
  154. Poststart []Hook
  155. // Poststop commands are executed after the container init process exits.
  156. Poststop []Hook
  157. }
  158. func (hooks *Hooks) UnmarshalJSON(b []byte) error {
  159. var state struct {
  160. Prestart []CommandHook
  161. Poststart []CommandHook
  162. Poststop []CommandHook
  163. }
  164. if err := json.Unmarshal(b, &state); err != nil {
  165. return err
  166. }
  167. deserialize := func(shooks []CommandHook) (hooks []Hook) {
  168. for _, shook := range shooks {
  169. hooks = append(hooks, shook)
  170. }
  171. return hooks
  172. }
  173. hooks.Prestart = deserialize(state.Prestart)
  174. hooks.Poststart = deserialize(state.Poststart)
  175. hooks.Poststop = deserialize(state.Poststop)
  176. return nil
  177. }
  178. func (hooks Hooks) MarshalJSON() ([]byte, error) {
  179. serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
  180. for _, hook := range hooks {
  181. switch chook := hook.(type) {
  182. case CommandHook:
  183. serializableHooks = append(serializableHooks, chook)
  184. default:
  185. logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
  186. }
  187. }
  188. return serializableHooks
  189. }
  190. return json.Marshal(map[string]interface{}{
  191. "prestart": serialize(hooks.Prestart),
  192. "poststart": serialize(hooks.Poststart),
  193. "poststop": serialize(hooks.Poststop),
  194. })
  195. }
  196. // HookState is the payload provided to a hook on execution.
  197. type HookState struct {
  198. Version string `json:"ociVersion"`
  199. ID string `json:"id"`
  200. Pid int `json:"pid"`
  201. Root string `json:"root"`
  202. BundlePath string `json:"bundlePath"`
  203. }
  204. type Hook interface {
  205. // Run executes the hook with the provided state.
  206. Run(HookState) error
  207. }
  208. // NewFunctionHooks will call the provided function when the hook is run.
  209. func NewFunctionHook(f func(HookState) error) FuncHook {
  210. return FuncHook{
  211. run: f,
  212. }
  213. }
  214. type FuncHook struct {
  215. run func(HookState) error
  216. }
  217. func (f FuncHook) Run(s HookState) error {
  218. return f.run(s)
  219. }
  220. type Command struct {
  221. Path string `json:"path"`
  222. Args []string `json:"args"`
  223. Env []string `json:"env"`
  224. Dir string `json:"dir"`
  225. Timeout *time.Duration `json:"timeout"`
  226. }
  227. // NewCommandHooks will execute the provided command when the hook is run.
  228. func NewCommandHook(cmd Command) CommandHook {
  229. return CommandHook{
  230. Command: cmd,
  231. }
  232. }
  233. type CommandHook struct {
  234. Command
  235. }
  236. func (c Command) Run(s HookState) error {
  237. b, err := json.Marshal(s)
  238. if err != nil {
  239. return err
  240. }
  241. cmd := exec.Cmd{
  242. Path: c.Path,
  243. Args: c.Args,
  244. Env: c.Env,
  245. Stdin: bytes.NewReader(b),
  246. }
  247. errC := make(chan error, 1)
  248. go func() {
  249. out, err := cmd.CombinedOutput()
  250. if err != nil {
  251. err = fmt.Errorf("%s: %s", err, out)
  252. }
  253. errC <- err
  254. }()
  255. if c.Timeout != nil {
  256. select {
  257. case err := <-errC:
  258. return err
  259. case <-time.After(*c.Timeout):
  260. cmd.Process.Kill()
  261. cmd.Wait()
  262. return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
  263. }
  264. }
  265. return <-errC
  266. }