qdisc_linux.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. package netlink
  2. import (
  3. "fmt"
  4. "io/ioutil"
  5. "strconv"
  6. "strings"
  7. "syscall"
  8. "github.com/vishvananda/netlink/nl"
  9. )
  10. // QdiscDel will delete a qdisc from the system.
  11. // Equivalent to: `tc qdisc del $qdisc`
  12. func QdiscDel(qdisc Qdisc) error {
  13. return qdiscModify(syscall.RTM_DELQDISC, 0, qdisc)
  14. }
  15. // QdiscChange will change a qdisc in place
  16. // Equivalent to: `tc qdisc change $qdisc`
  17. // The parent and handle MUST NOT be changed.
  18. func QdiscChange(qdisc Qdisc) error {
  19. return qdiscModify(syscall.RTM_NEWQDISC, 0, qdisc)
  20. }
  21. // QdiscReplace will replace a qdisc to the system.
  22. // Equivalent to: `tc qdisc replace $qdisc`
  23. // The handle MUST change.
  24. func QdiscReplace(qdisc Qdisc) error {
  25. return qdiscModify(
  26. syscall.RTM_NEWQDISC,
  27. syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE,
  28. qdisc)
  29. }
  30. // QdiscAdd will add a qdisc to the system.
  31. // Equivalent to: `tc qdisc add $qdisc`
  32. func QdiscAdd(qdisc Qdisc) error {
  33. return qdiscModify(
  34. syscall.RTM_NEWQDISC,
  35. syscall.NLM_F_CREATE|syscall.NLM_F_EXCL,
  36. qdisc)
  37. }
  38. func qdiscModify(cmd, flags int, qdisc Qdisc) error {
  39. req := nl.NewNetlinkRequest(cmd, flags|syscall.NLM_F_ACK)
  40. base := qdisc.Attrs()
  41. msg := &nl.TcMsg{
  42. Family: nl.FAMILY_ALL,
  43. Ifindex: int32(base.LinkIndex),
  44. Handle: base.Handle,
  45. Parent: base.Parent,
  46. }
  47. req.AddData(msg)
  48. // When deleting don't bother building the rest of the netlink payload
  49. if cmd != syscall.RTM_DELQDISC {
  50. if err := qdiscPayload(req, qdisc); err != nil {
  51. return err
  52. }
  53. }
  54. _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
  55. return err
  56. }
  57. func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
  58. req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type())))
  59. options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
  60. if prio, ok := qdisc.(*Prio); ok {
  61. tcmap := nl.TcPrioMap{
  62. Bands: int32(prio.Bands),
  63. Priomap: prio.PriorityMap,
  64. }
  65. options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize())
  66. } else if tbf, ok := qdisc.(*Tbf); ok {
  67. opt := nl.TcTbfQopt{}
  68. // TODO: handle rate > uint32
  69. opt.Rate.Rate = uint32(tbf.Rate)
  70. opt.Limit = tbf.Limit
  71. opt.Buffer = tbf.Buffer
  72. nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
  73. } else if htb, ok := qdisc.(*Htb); ok {
  74. opt := nl.TcHtbGlob{}
  75. opt.Version = htb.Version
  76. opt.Rate2Quantum = htb.Rate2Quantum
  77. opt.Defcls = htb.Defcls
  78. // TODO: Handle Debug properly. For now default to 0
  79. opt.Debug = htb.Debug
  80. opt.DirectPkts = htb.DirectPkts
  81. nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize())
  82. // nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
  83. } else if netem, ok := qdisc.(*Netem); ok {
  84. opt := nl.TcNetemQopt{}
  85. opt.Latency = netem.Latency
  86. opt.Limit = netem.Limit
  87. opt.Loss = netem.Loss
  88. opt.Gap = netem.Gap
  89. opt.Duplicate = netem.Duplicate
  90. opt.Jitter = netem.Jitter
  91. options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize())
  92. // Correlation
  93. corr := nl.TcNetemCorr{}
  94. corr.DelayCorr = netem.DelayCorr
  95. corr.LossCorr = netem.LossCorr
  96. corr.DupCorr = netem.DuplicateCorr
  97. if corr.DelayCorr > 0 || corr.LossCorr > 0 || corr.DupCorr > 0 {
  98. nl.NewRtAttrChild(options, nl.TCA_NETEM_CORR, corr.Serialize())
  99. }
  100. // Corruption
  101. corruption := nl.TcNetemCorrupt{}
  102. corruption.Probability = netem.CorruptProb
  103. corruption.Correlation = netem.CorruptCorr
  104. if corruption.Probability > 0 {
  105. nl.NewRtAttrChild(options, nl.TCA_NETEM_CORRUPT, corruption.Serialize())
  106. }
  107. // Reorder
  108. reorder := nl.TcNetemReorder{}
  109. reorder.Probability = netem.ReorderProb
  110. reorder.Correlation = netem.ReorderCorr
  111. if reorder.Probability > 0 {
  112. nl.NewRtAttrChild(options, nl.TCA_NETEM_REORDER, reorder.Serialize())
  113. }
  114. } else if _, ok := qdisc.(*Ingress); ok {
  115. // ingress filters must use the proper handle
  116. if qdisc.Attrs().Parent != HANDLE_INGRESS {
  117. return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS")
  118. }
  119. }
  120. req.AddData(options)
  121. return nil
  122. }
  123. // QdiscList gets a list of qdiscs in the system.
  124. // Equivalent to: `tc qdisc show`.
  125. // The list can be filtered by link.
  126. func QdiscList(link Link) ([]Qdisc, error) {
  127. req := nl.NewNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP)
  128. index := int32(0)
  129. if link != nil {
  130. base := link.Attrs()
  131. ensureIndex(base)
  132. index = int32(base.Index)
  133. }
  134. msg := &nl.TcMsg{
  135. Family: nl.FAMILY_ALL,
  136. Ifindex: index,
  137. }
  138. req.AddData(msg)
  139. msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC)
  140. if err != nil {
  141. return nil, err
  142. }
  143. var res []Qdisc
  144. for _, m := range msgs {
  145. msg := nl.DeserializeTcMsg(m)
  146. attrs, err := nl.ParseRouteAttr(m[msg.Len():])
  147. if err != nil {
  148. return nil, err
  149. }
  150. // skip qdiscs from other interfaces
  151. if link != nil && msg.Ifindex != index {
  152. continue
  153. }
  154. base := QdiscAttrs{
  155. LinkIndex: int(msg.Ifindex),
  156. Handle: msg.Handle,
  157. Parent: msg.Parent,
  158. Refcnt: msg.Info,
  159. }
  160. var qdisc Qdisc
  161. qdiscType := ""
  162. for _, attr := range attrs {
  163. switch attr.Attr.Type {
  164. case nl.TCA_KIND:
  165. qdiscType = string(attr.Value[:len(attr.Value)-1])
  166. switch qdiscType {
  167. case "pfifo_fast":
  168. qdisc = &PfifoFast{}
  169. case "prio":
  170. qdisc = &Prio{}
  171. case "tbf":
  172. qdisc = &Tbf{}
  173. case "ingress":
  174. qdisc = &Ingress{}
  175. case "htb":
  176. qdisc = &Htb{}
  177. case "netem":
  178. qdisc = &Netem{}
  179. default:
  180. qdisc = &GenericQdisc{QdiscType: qdiscType}
  181. }
  182. case nl.TCA_OPTIONS:
  183. switch qdiscType {
  184. case "pfifo_fast":
  185. // pfifo returns TcPrioMap directly without wrapping it in rtattr
  186. if err := parsePfifoFastData(qdisc, attr.Value); err != nil {
  187. return nil, err
  188. }
  189. case "prio":
  190. // prio returns TcPrioMap directly without wrapping it in rtattr
  191. if err := parsePrioData(qdisc, attr.Value); err != nil {
  192. return nil, err
  193. }
  194. case "tbf":
  195. data, err := nl.ParseRouteAttr(attr.Value)
  196. if err != nil {
  197. return nil, err
  198. }
  199. if err := parseTbfData(qdisc, data); err != nil {
  200. return nil, err
  201. }
  202. case "htb":
  203. data, err := nl.ParseRouteAttr(attr.Value)
  204. if err != nil {
  205. return nil, err
  206. }
  207. if err := parseHtbData(qdisc, data); err != nil {
  208. return nil, err
  209. }
  210. case "netem":
  211. if err := parseNetemData(qdisc, attr.Value); err != nil {
  212. return nil, err
  213. }
  214. // no options for ingress
  215. }
  216. }
  217. }
  218. *qdisc.Attrs() = base
  219. res = append(res, qdisc)
  220. }
  221. return res, nil
  222. }
  223. func parsePfifoFastData(qdisc Qdisc, value []byte) error {
  224. pfifo := qdisc.(*PfifoFast)
  225. tcmap := nl.DeserializeTcPrioMap(value)
  226. pfifo.PriorityMap = tcmap.Priomap
  227. pfifo.Bands = uint8(tcmap.Bands)
  228. return nil
  229. }
  230. func parsePrioData(qdisc Qdisc, value []byte) error {
  231. prio := qdisc.(*Prio)
  232. tcmap := nl.DeserializeTcPrioMap(value)
  233. prio.PriorityMap = tcmap.Priomap
  234. prio.Bands = uint8(tcmap.Bands)
  235. return nil
  236. }
  237. func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
  238. native = nl.NativeEndian()
  239. htb := qdisc.(*Htb)
  240. for _, datum := range data {
  241. switch datum.Attr.Type {
  242. case nl.TCA_HTB_INIT:
  243. opt := nl.DeserializeTcHtbGlob(datum.Value)
  244. htb.Version = opt.Version
  245. htb.Rate2Quantum = opt.Rate2Quantum
  246. htb.Defcls = opt.Defcls
  247. htb.Debug = opt.Debug
  248. htb.DirectPkts = opt.DirectPkts
  249. case nl.TCA_HTB_DIRECT_QLEN:
  250. // TODO
  251. //htb.DirectQlen = native.uint32(datum.Value)
  252. }
  253. }
  254. return nil
  255. }
  256. func parseNetemData(qdisc Qdisc, value []byte) error {
  257. netem := qdisc.(*Netem)
  258. opt := nl.DeserializeTcNetemQopt(value)
  259. netem.Latency = opt.Latency
  260. netem.Limit = opt.Limit
  261. netem.Loss = opt.Loss
  262. netem.Gap = opt.Gap
  263. netem.Duplicate = opt.Duplicate
  264. netem.Jitter = opt.Jitter
  265. data, err := nl.ParseRouteAttr(value[nl.SizeofTcNetemQopt:])
  266. if err != nil {
  267. return err
  268. }
  269. for _, datum := range data {
  270. switch datum.Attr.Type {
  271. case nl.TCA_NETEM_CORR:
  272. opt := nl.DeserializeTcNetemCorr(datum.Value)
  273. netem.DelayCorr = opt.DelayCorr
  274. netem.LossCorr = opt.LossCorr
  275. netem.DuplicateCorr = opt.DupCorr
  276. case nl.TCA_NETEM_CORRUPT:
  277. opt := nl.DeserializeTcNetemCorrupt(datum.Value)
  278. netem.CorruptProb = opt.Probability
  279. netem.CorruptCorr = opt.Correlation
  280. case nl.TCA_NETEM_REORDER:
  281. opt := nl.DeserializeTcNetemReorder(datum.Value)
  282. netem.ReorderProb = opt.Probability
  283. netem.ReorderCorr = opt.Correlation
  284. }
  285. }
  286. return nil
  287. }
  288. func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
  289. native = nl.NativeEndian()
  290. tbf := qdisc.(*Tbf)
  291. for _, datum := range data {
  292. switch datum.Attr.Type {
  293. case nl.TCA_TBF_PARMS:
  294. opt := nl.DeserializeTcTbfQopt(datum.Value)
  295. tbf.Rate = uint64(opt.Rate.Rate)
  296. tbf.Limit = opt.Limit
  297. tbf.Buffer = opt.Buffer
  298. case nl.TCA_TBF_RATE64:
  299. tbf.Rate = native.Uint64(datum.Value[0:4])
  300. }
  301. }
  302. return nil
  303. }
  304. const (
  305. TIME_UNITS_PER_SEC = 1000000
  306. )
  307. var (
  308. tickInUsec float64 = 0.0
  309. clockFactor float64 = 0.0
  310. hz float64 = 0.0
  311. )
  312. func initClock() {
  313. data, err := ioutil.ReadFile("/proc/net/psched")
  314. if err != nil {
  315. return
  316. }
  317. parts := strings.Split(strings.TrimSpace(string(data)), " ")
  318. if len(parts) < 3 {
  319. return
  320. }
  321. var vals [3]uint64
  322. for i := range vals {
  323. val, err := strconv.ParseUint(parts[i], 16, 32)
  324. if err != nil {
  325. return
  326. }
  327. vals[i] = val
  328. }
  329. // compatibility
  330. if vals[2] == 1000000000 {
  331. vals[0] = vals[1]
  332. }
  333. clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC
  334. tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor
  335. hz = float64(vals[0])
  336. }
  337. func TickInUsec() float64 {
  338. if tickInUsec == 0.0 {
  339. initClock()
  340. }
  341. return tickInUsec
  342. }
  343. func ClockFactor() float64 {
  344. if clockFactor == 0.0 {
  345. initClock()
  346. }
  347. return clockFactor
  348. }
  349. func Hz() float64 {
  350. if hz == 0.0 {
  351. initClock()
  352. }
  353. return hz
  354. }
  355. func time2Tick(time uint32) uint32 {
  356. return uint32(float64(time) * TickInUsec())
  357. }
  358. func tick2Time(tick uint32) uint32 {
  359. return uint32(float64(tick) / TickInUsec())
  360. }
  361. func time2Ktime(time uint32) uint32 {
  362. return uint32(float64(time) * ClockFactor())
  363. }
  364. func ktime2Time(ktime uint32) uint32 {
  365. return uint32(float64(ktime) / ClockFactor())
  366. }
  367. func burst(rate uint64, buffer uint32) uint32 {
  368. return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC)
  369. }
  370. func latency(rate uint64, limit, buffer uint32) float64 {
  371. return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer))
  372. }
  373. func Xmittime(rate uint64, size uint32) float64 {
  374. return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate))
  375. }