init_linux.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. // +build linux
  2. package libcontainer
  3. import (
  4. "encoding/json"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "net"
  9. "os"
  10. "strconv"
  11. "strings"
  12. "syscall"
  13. "github.com/Sirupsen/logrus"
  14. "github.com/opencontainers/runc/libcontainer/cgroups"
  15. "github.com/opencontainers/runc/libcontainer/configs"
  16. "github.com/opencontainers/runc/libcontainer/system"
  17. "github.com/opencontainers/runc/libcontainer/user"
  18. "github.com/opencontainers/runc/libcontainer/utils"
  19. "github.com/vishvananda/netlink"
  20. )
  21. type initType string
  22. const (
  23. initSetns initType = "setns"
  24. initStandard initType = "standard"
  25. )
  26. type pid struct {
  27. Pid int `json:"pid"`
  28. }
  29. // network is an internal struct used to setup container networks.
  30. type network struct {
  31. configs.Network
  32. // TempVethPeerName is a unique temporary veth peer name that was placed into
  33. // the container's namespace.
  34. TempVethPeerName string `json:"temp_veth_peer_name"`
  35. }
  36. // initConfig is used for transferring parameters from Exec() to Init()
  37. type initConfig struct {
  38. Args []string `json:"args"`
  39. Env []string `json:"env"`
  40. Cwd string `json:"cwd"`
  41. Capabilities []string `json:"capabilities"`
  42. ProcessLabel string `json:"process_label"`
  43. AppArmorProfile string `json:"apparmor_profile"`
  44. NoNewPrivileges bool `json:"no_new_privileges"`
  45. User string `json:"user"`
  46. Config *configs.Config `json:"config"`
  47. Console string `json:"console"`
  48. Networks []*network `json:"network"`
  49. PassedFilesCount int `json:"passed_files_count"`
  50. ContainerId string `json:"containerid"`
  51. Rlimits []configs.Rlimit `json:"rlimits"`
  52. }
  53. type initer interface {
  54. Init() error
  55. }
  56. func newContainerInit(t initType, pipe *os.File) (initer, error) {
  57. var config *initConfig
  58. if err := json.NewDecoder(pipe).Decode(&config); err != nil {
  59. return nil, err
  60. }
  61. if err := populateProcessEnvironment(config.Env); err != nil {
  62. return nil, err
  63. }
  64. switch t {
  65. case initSetns:
  66. return &linuxSetnsInit{
  67. config: config,
  68. }, nil
  69. case initStandard:
  70. return &linuxStandardInit{
  71. pipe: pipe,
  72. parentPid: syscall.Getppid(),
  73. config: config,
  74. }, nil
  75. }
  76. return nil, fmt.Errorf("unknown init type %q", t)
  77. }
  78. // populateProcessEnvironment loads the provided environment variables into the
  79. // current processes's environment.
  80. func populateProcessEnvironment(env []string) error {
  81. for _, pair := range env {
  82. p := strings.SplitN(pair, "=", 2)
  83. if len(p) < 2 {
  84. return fmt.Errorf("invalid environment '%v'", pair)
  85. }
  86. if err := os.Setenv(p[0], p[1]); err != nil {
  87. return err
  88. }
  89. }
  90. return nil
  91. }
  92. // finalizeNamespace drops the caps, sets the correct user
  93. // and working dir, and closes any leaked file descriptors
  94. // before executing the command inside the namespace
  95. func finalizeNamespace(config *initConfig) error {
  96. // Ensure that all unwanted fds we may have accidentally
  97. // inherited are marked close-on-exec so they stay out of the
  98. // container
  99. if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
  100. return err
  101. }
  102. capabilities := config.Config.Capabilities
  103. if config.Capabilities != nil {
  104. capabilities = config.Capabilities
  105. }
  106. w, err := newCapWhitelist(capabilities)
  107. if err != nil {
  108. return err
  109. }
  110. // drop capabilities in bounding set before changing user
  111. if err := w.dropBoundingSet(); err != nil {
  112. return err
  113. }
  114. // preserve existing capabilities while we change users
  115. if err := system.SetKeepCaps(); err != nil {
  116. return err
  117. }
  118. if err := setupUser(config); err != nil {
  119. return err
  120. }
  121. if err := system.ClearKeepCaps(); err != nil {
  122. return err
  123. }
  124. // drop all other capabilities
  125. if err := w.drop(); err != nil {
  126. return err
  127. }
  128. if config.Cwd != "" {
  129. if err := syscall.Chdir(config.Cwd); err != nil {
  130. return err
  131. }
  132. }
  133. return nil
  134. }
  135. // syncParentReady sends to the given pipe a JSON payload which indicates that
  136. // the init is ready to Exec the child process. It then waits for the parent to
  137. // indicate that it is cleared to Exec.
  138. func syncParentReady(pipe io.ReadWriter) error {
  139. // Tell parent.
  140. if err := utils.WriteJSON(pipe, syncT{procReady}); err != nil {
  141. return err
  142. }
  143. // Wait for parent to give the all-clear.
  144. var procSync syncT
  145. if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
  146. if err == io.EOF {
  147. return fmt.Errorf("parent closed synchronisation channel")
  148. }
  149. if procSync.Type != procRun {
  150. return fmt.Errorf("invalid synchronisation flag from parent")
  151. }
  152. }
  153. return nil
  154. }
  155. // syncParentHooks sends to the given pipe a JSON payload which indicates that
  156. // the parent should execute pre-start hooks. It then waits for the parent to
  157. // indicate that it is cleared to resume.
  158. func syncParentHooks(pipe io.ReadWriter) error {
  159. // Tell parent.
  160. if err := utils.WriteJSON(pipe, syncT{procHooks}); err != nil {
  161. return err
  162. }
  163. // Wait for parent to give the all-clear.
  164. var procSync syncT
  165. if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
  166. if err == io.EOF {
  167. return fmt.Errorf("parent closed synchronisation channel")
  168. }
  169. if procSync.Type != procResume {
  170. return fmt.Errorf("invalid synchronisation flag from parent")
  171. }
  172. }
  173. return nil
  174. }
  175. // setupUser changes the groups, gid, and uid for the user inside the container
  176. func setupUser(config *initConfig) error {
  177. // Set up defaults.
  178. defaultExecUser := user.ExecUser{
  179. Uid: syscall.Getuid(),
  180. Gid: syscall.Getgid(),
  181. Home: "/",
  182. }
  183. passwdPath, err := user.GetPasswdPath()
  184. if err != nil {
  185. return err
  186. }
  187. groupPath, err := user.GetGroupPath()
  188. if err != nil {
  189. return err
  190. }
  191. execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
  192. if err != nil {
  193. return err
  194. }
  195. var addGroups []int
  196. if len(config.Config.AdditionalGroups) > 0 {
  197. addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath)
  198. if err != nil {
  199. return err
  200. }
  201. }
  202. // before we change to the container's user make sure that the processes STDIO
  203. // is correctly owned by the user that we are switching to.
  204. if err := fixStdioPermissions(execUser); err != nil {
  205. return err
  206. }
  207. suppGroups := append(execUser.Sgids, addGroups...)
  208. if err := syscall.Setgroups(suppGroups); err != nil {
  209. return err
  210. }
  211. if err := system.Setgid(execUser.Gid); err != nil {
  212. return err
  213. }
  214. if err := system.Setuid(execUser.Uid); err != nil {
  215. return err
  216. }
  217. // if we didn't get HOME already, set it based on the user's HOME
  218. if envHome := os.Getenv("HOME"); envHome == "" {
  219. if err := os.Setenv("HOME", execUser.Home); err != nil {
  220. return err
  221. }
  222. }
  223. return nil
  224. }
  225. // fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
  226. // The ownership needs to match because it is created outside of the container and needs to be
  227. // localized.
  228. func fixStdioPermissions(u *user.ExecUser) error {
  229. var null syscall.Stat_t
  230. if err := syscall.Stat("/dev/null", &null); err != nil {
  231. return err
  232. }
  233. for _, fd := range []uintptr{
  234. os.Stdin.Fd(),
  235. os.Stderr.Fd(),
  236. os.Stdout.Fd(),
  237. } {
  238. var s syscall.Stat_t
  239. if err := syscall.Fstat(int(fd), &s); err != nil {
  240. return err
  241. }
  242. // skip chown of /dev/null if it was used as one of the STDIO fds.
  243. if s.Rdev == null.Rdev {
  244. continue
  245. }
  246. if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil {
  247. return err
  248. }
  249. }
  250. return nil
  251. }
  252. // setupNetwork sets up and initializes any network interface inside the container.
  253. func setupNetwork(config *initConfig) error {
  254. for _, config := range config.Networks {
  255. strategy, err := getStrategy(config.Type)
  256. if err != nil {
  257. return err
  258. }
  259. if err := strategy.initialize(config); err != nil {
  260. return err
  261. }
  262. }
  263. return nil
  264. }
  265. func setupRoute(config *configs.Config) error {
  266. for _, config := range config.Routes {
  267. _, dst, err := net.ParseCIDR(config.Destination)
  268. if err != nil {
  269. return err
  270. }
  271. src := net.ParseIP(config.Source)
  272. if src == nil {
  273. return fmt.Errorf("Invalid source for route: %s", config.Source)
  274. }
  275. gw := net.ParseIP(config.Gateway)
  276. if gw == nil {
  277. return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
  278. }
  279. l, err := netlink.LinkByName(config.InterfaceName)
  280. if err != nil {
  281. return err
  282. }
  283. route := &netlink.Route{
  284. Scope: netlink.SCOPE_UNIVERSE,
  285. Dst: dst,
  286. Src: src,
  287. Gw: gw,
  288. LinkIndex: l.Attrs().Index,
  289. }
  290. if err := netlink.RouteAdd(route); err != nil {
  291. return err
  292. }
  293. }
  294. return nil
  295. }
  296. func setupRlimits(limits []configs.Rlimit, pid int) error {
  297. for _, rlimit := range limits {
  298. if err := system.Prlimit(pid, rlimit.Type, syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
  299. return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
  300. }
  301. }
  302. return nil
  303. }
  304. func setOomScoreAdj(oomScoreAdj int, pid int) error {
  305. path := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
  306. return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0600)
  307. }
  308. // killCgroupProcesses freezes then iterates over all the processes inside the
  309. // manager's cgroups sending a SIGKILL to each process then waiting for them to
  310. // exit.
  311. func killCgroupProcesses(m cgroups.Manager) error {
  312. var procs []*os.Process
  313. if err := m.Freeze(configs.Frozen); err != nil {
  314. logrus.Warn(err)
  315. }
  316. pids, err := m.GetAllPids()
  317. if err != nil {
  318. m.Freeze(configs.Thawed)
  319. return err
  320. }
  321. for _, pid := range pids {
  322. p, err := os.FindProcess(pid)
  323. if err != nil {
  324. logrus.Warn(err)
  325. continue
  326. }
  327. procs = append(procs, p)
  328. if err := p.Kill(); err != nil {
  329. logrus.Warn(err)
  330. }
  331. }
  332. if err := m.Freeze(configs.Thawed); err != nil {
  333. logrus.Warn(err)
  334. }
  335. for _, p := range procs {
  336. if _, err := p.Wait(); err != nil {
  337. logrus.Warn(err)
  338. }
  339. }
  340. return nil
  341. }