standard_init_linux.go 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. // +build linux
  2. package libcontainer
  3. import (
  4. "fmt"
  5. "io"
  6. "os"
  7. "syscall"
  8. "github.com/opencontainers/runc/libcontainer/apparmor"
  9. "github.com/opencontainers/runc/libcontainer/configs"
  10. "github.com/opencontainers/runc/libcontainer/keys"
  11. "github.com/opencontainers/runc/libcontainer/label"
  12. "github.com/opencontainers/runc/libcontainer/seccomp"
  13. "github.com/opencontainers/runc/libcontainer/system"
  14. )
  15. type linuxStandardInit struct {
  16. pipe io.ReadWriter
  17. parentPid int
  18. config *initConfig
  19. }
  20. func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
  21. var newperms uint32
  22. if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
  23. // with user ns we need 'other' search permissions
  24. newperms = 0x8
  25. } else {
  26. // without user ns we need 'UID' search permissions
  27. newperms = 0x80000
  28. }
  29. // create a unique per session container name that we can
  30. // join in setns; however, other containers can also join it
  31. return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
  32. }
  33. // PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value
  34. // the kernel
  35. const PR_SET_NO_NEW_PRIVS = 0x26
  36. func (l *linuxStandardInit) Init() error {
  37. ringname, keepperms, newperms := l.getSessionRingParams()
  38. // do not inherit the parent's session keyring
  39. sessKeyId, err := keyctl.JoinSessionKeyring(ringname)
  40. if err != nil {
  41. return err
  42. }
  43. // make session keyring searcheable
  44. if err := keyctl.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
  45. return err
  46. }
  47. var console *linuxConsole
  48. if l.config.Console != "" {
  49. console = newConsoleFromPath(l.config.Console)
  50. if err := console.dupStdio(); err != nil {
  51. return err
  52. }
  53. }
  54. if console != nil {
  55. if err := system.Setctty(); err != nil {
  56. return err
  57. }
  58. }
  59. if err := setupNetwork(l.config); err != nil {
  60. return err
  61. }
  62. if err := setupRoute(l.config.Config); err != nil {
  63. return err
  64. }
  65. label.Init()
  66. // InitializeMountNamespace() can be executed only for a new mount namespace
  67. if l.config.Config.Namespaces.Contains(configs.NEWNS) {
  68. if err := setupRootfs(l.config.Config, console, l.pipe); err != nil {
  69. return err
  70. }
  71. }
  72. if hostname := l.config.Config.Hostname; hostname != "" {
  73. if err := syscall.Sethostname([]byte(hostname)); err != nil {
  74. return err
  75. }
  76. }
  77. if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
  78. return err
  79. }
  80. if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
  81. return err
  82. }
  83. for key, value := range l.config.Config.Sysctl {
  84. if err := writeSystemProperty(key, value); err != nil {
  85. return err
  86. }
  87. }
  88. for _, path := range l.config.Config.ReadonlyPaths {
  89. if err := remountReadonly(path); err != nil {
  90. return err
  91. }
  92. }
  93. for _, path := range l.config.Config.MaskPaths {
  94. if err := maskFile(path); err != nil {
  95. return err
  96. }
  97. }
  98. pdeath, err := system.GetParentDeathSignal()
  99. if err != nil {
  100. return err
  101. }
  102. if l.config.NoNewPrivileges {
  103. if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
  104. return err
  105. }
  106. }
  107. // Tell our parent that we're ready to Execv. This must be done before the
  108. // Seccomp rules have been applied, because we need to be able to read and
  109. // write to a socket.
  110. if err := syncParentReady(l.pipe); err != nil {
  111. return err
  112. }
  113. if l.config.Config.Seccomp != nil {
  114. if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
  115. return err
  116. }
  117. }
  118. if err := finalizeNamespace(l.config); err != nil {
  119. return err
  120. }
  121. // finalizeNamespace can change user/group which clears the parent death
  122. // signal, so we restore it here.
  123. if err := pdeath.Restore(); err != nil {
  124. return err
  125. }
  126. // compare the parent from the inital start of the init process and make sure that it did not change.
  127. // if the parent changes that means it died and we were reparened to something else so we should
  128. // just kill ourself and not cause problems for someone else.
  129. if syscall.Getppid() != l.parentPid {
  130. return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
  131. }
  132. return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
  133. }