qdisc_linux.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. package netlink
  2. import (
  3. "fmt"
  4. "io/ioutil"
  5. "strconv"
  6. "strings"
  7. "syscall"
  8. "github.com/vishvananda/netlink/nl"
  9. )
  10. // NOTE function is here because it uses other linux functions
  11. func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem {
  12. var limit uint32 = 1000
  13. var lossCorr, delayCorr, duplicateCorr uint32
  14. var reorderProb, reorderCorr uint32
  15. var corruptProb, corruptCorr uint32
  16. latency := nattrs.Latency
  17. loss := Percentage2u32(nattrs.Loss)
  18. gap := nattrs.Gap
  19. duplicate := Percentage2u32(nattrs.Duplicate)
  20. jitter := nattrs.Jitter
  21. // Correlation
  22. if latency > 0 && jitter > 0 {
  23. delayCorr = Percentage2u32(nattrs.DelayCorr)
  24. }
  25. if loss > 0 {
  26. lossCorr = Percentage2u32(nattrs.LossCorr)
  27. }
  28. if duplicate > 0 {
  29. duplicateCorr = Percentage2u32(nattrs.DuplicateCorr)
  30. }
  31. // FIXME should validate values(like loss/duplicate are percentages...)
  32. latency = time2Tick(latency)
  33. if nattrs.Limit != 0 {
  34. limit = nattrs.Limit
  35. }
  36. // Jitter is only value if latency is > 0
  37. if latency > 0 {
  38. jitter = time2Tick(jitter)
  39. }
  40. reorderProb = Percentage2u32(nattrs.ReorderProb)
  41. reorderCorr = Percentage2u32(nattrs.ReorderCorr)
  42. if reorderProb > 0 {
  43. // ERROR if lantency == 0
  44. if gap == 0 {
  45. gap = 1
  46. }
  47. }
  48. corruptProb = Percentage2u32(nattrs.CorruptProb)
  49. corruptCorr = Percentage2u32(nattrs.CorruptCorr)
  50. return &Netem{
  51. QdiscAttrs: attrs,
  52. Latency: latency,
  53. DelayCorr: delayCorr,
  54. Limit: limit,
  55. Loss: loss,
  56. LossCorr: lossCorr,
  57. Gap: gap,
  58. Duplicate: duplicate,
  59. DuplicateCorr: duplicateCorr,
  60. Jitter: jitter,
  61. ReorderProb: reorderProb,
  62. ReorderCorr: reorderCorr,
  63. CorruptProb: corruptProb,
  64. CorruptCorr: corruptCorr,
  65. }
  66. }
  67. // QdiscDel will delete a qdisc from the system.
  68. // Equivalent to: `tc qdisc del $qdisc`
  69. func QdiscDel(qdisc Qdisc) error {
  70. return pkgHandle.QdiscDel(qdisc)
  71. }
  72. // QdiscDel will delete a qdisc from the system.
  73. // Equivalent to: `tc qdisc del $qdisc`
  74. func (h *Handle) QdiscDel(qdisc Qdisc) error {
  75. return h.qdiscModify(syscall.RTM_DELQDISC, 0, qdisc)
  76. }
  77. // QdiscChange will change a qdisc in place
  78. // Equivalent to: `tc qdisc change $qdisc`
  79. // The parent and handle MUST NOT be changed.
  80. func QdiscChange(qdisc Qdisc) error {
  81. return pkgHandle.QdiscChange(qdisc)
  82. }
  83. // QdiscChange will change a qdisc in place
  84. // Equivalent to: `tc qdisc change $qdisc`
  85. // The parent and handle MUST NOT be changed.
  86. func (h *Handle) QdiscChange(qdisc Qdisc) error {
  87. return h.qdiscModify(syscall.RTM_NEWQDISC, 0, qdisc)
  88. }
  89. // QdiscReplace will replace a qdisc to the system.
  90. // Equivalent to: `tc qdisc replace $qdisc`
  91. // The handle MUST change.
  92. func QdiscReplace(qdisc Qdisc) error {
  93. return pkgHandle.QdiscReplace(qdisc)
  94. }
  95. // QdiscReplace will replace a qdisc to the system.
  96. // Equivalent to: `tc qdisc replace $qdisc`
  97. // The handle MUST change.
  98. func (h *Handle) QdiscReplace(qdisc Qdisc) error {
  99. return h.qdiscModify(
  100. syscall.RTM_NEWQDISC,
  101. syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE,
  102. qdisc)
  103. }
  104. // QdiscAdd will add a qdisc to the system.
  105. // Equivalent to: `tc qdisc add $qdisc`
  106. func QdiscAdd(qdisc Qdisc) error {
  107. return pkgHandle.QdiscAdd(qdisc)
  108. }
  109. // QdiscAdd will add a qdisc to the system.
  110. // Equivalent to: `tc qdisc add $qdisc`
  111. func (h *Handle) QdiscAdd(qdisc Qdisc) error {
  112. return h.qdiscModify(
  113. syscall.RTM_NEWQDISC,
  114. syscall.NLM_F_CREATE|syscall.NLM_F_EXCL,
  115. qdisc)
  116. }
  117. func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error {
  118. req := h.newNetlinkRequest(cmd, flags|syscall.NLM_F_ACK)
  119. base := qdisc.Attrs()
  120. msg := &nl.TcMsg{
  121. Family: nl.FAMILY_ALL,
  122. Ifindex: int32(base.LinkIndex),
  123. Handle: base.Handle,
  124. Parent: base.Parent,
  125. }
  126. req.AddData(msg)
  127. // When deleting don't bother building the rest of the netlink payload
  128. if cmd != syscall.RTM_DELQDISC {
  129. if err := qdiscPayload(req, qdisc); err != nil {
  130. return err
  131. }
  132. }
  133. _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
  134. return err
  135. }
  136. func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
  137. req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type())))
  138. options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
  139. if prio, ok := qdisc.(*Prio); ok {
  140. tcmap := nl.TcPrioMap{
  141. Bands: int32(prio.Bands),
  142. Priomap: prio.PriorityMap,
  143. }
  144. options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize())
  145. } else if tbf, ok := qdisc.(*Tbf); ok {
  146. opt := nl.TcTbfQopt{}
  147. opt.Rate.Rate = uint32(tbf.Rate)
  148. opt.Peakrate.Rate = uint32(tbf.Peakrate)
  149. opt.Limit = tbf.Limit
  150. opt.Buffer = tbf.Buffer
  151. nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
  152. if tbf.Rate >= uint64(1<<32) {
  153. nl.NewRtAttrChild(options, nl.TCA_TBF_RATE64, nl.Uint64Attr(tbf.Rate))
  154. }
  155. if tbf.Peakrate >= uint64(1<<32) {
  156. nl.NewRtAttrChild(options, nl.TCA_TBF_PRATE64, nl.Uint64Attr(tbf.Peakrate))
  157. }
  158. if tbf.Peakrate > 0 {
  159. nl.NewRtAttrChild(options, nl.TCA_TBF_PBURST, nl.Uint32Attr(tbf.Minburst))
  160. }
  161. } else if htb, ok := qdisc.(*Htb); ok {
  162. opt := nl.TcHtbGlob{}
  163. opt.Version = htb.Version
  164. opt.Rate2Quantum = htb.Rate2Quantum
  165. opt.Defcls = htb.Defcls
  166. // TODO: Handle Debug properly. For now default to 0
  167. opt.Debug = htb.Debug
  168. opt.DirectPkts = htb.DirectPkts
  169. nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize())
  170. // nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
  171. } else if netem, ok := qdisc.(*Netem); ok {
  172. opt := nl.TcNetemQopt{}
  173. opt.Latency = netem.Latency
  174. opt.Limit = netem.Limit
  175. opt.Loss = netem.Loss
  176. opt.Gap = netem.Gap
  177. opt.Duplicate = netem.Duplicate
  178. opt.Jitter = netem.Jitter
  179. options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize())
  180. // Correlation
  181. corr := nl.TcNetemCorr{}
  182. corr.DelayCorr = netem.DelayCorr
  183. corr.LossCorr = netem.LossCorr
  184. corr.DupCorr = netem.DuplicateCorr
  185. if corr.DelayCorr > 0 || corr.LossCorr > 0 || corr.DupCorr > 0 {
  186. nl.NewRtAttrChild(options, nl.TCA_NETEM_CORR, corr.Serialize())
  187. }
  188. // Corruption
  189. corruption := nl.TcNetemCorrupt{}
  190. corruption.Probability = netem.CorruptProb
  191. corruption.Correlation = netem.CorruptCorr
  192. if corruption.Probability > 0 {
  193. nl.NewRtAttrChild(options, nl.TCA_NETEM_CORRUPT, corruption.Serialize())
  194. }
  195. // Reorder
  196. reorder := nl.TcNetemReorder{}
  197. reorder.Probability = netem.ReorderProb
  198. reorder.Correlation = netem.ReorderCorr
  199. if reorder.Probability > 0 {
  200. nl.NewRtAttrChild(options, nl.TCA_NETEM_REORDER, reorder.Serialize())
  201. }
  202. } else if _, ok := qdisc.(*Ingress); ok {
  203. // ingress filters must use the proper handle
  204. if qdisc.Attrs().Parent != HANDLE_INGRESS {
  205. return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS")
  206. }
  207. }
  208. req.AddData(options)
  209. return nil
  210. }
  211. // QdiscList gets a list of qdiscs in the system.
  212. // Equivalent to: `tc qdisc show`.
  213. // The list can be filtered by link.
  214. func QdiscList(link Link) ([]Qdisc, error) {
  215. return pkgHandle.QdiscList(link)
  216. }
  217. // QdiscList gets a list of qdiscs in the system.
  218. // Equivalent to: `tc qdisc show`.
  219. // The list can be filtered by link.
  220. func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
  221. req := h.newNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP)
  222. index := int32(0)
  223. if link != nil {
  224. base := link.Attrs()
  225. h.ensureIndex(base)
  226. index = int32(base.Index)
  227. }
  228. msg := &nl.TcMsg{
  229. Family: nl.FAMILY_ALL,
  230. Ifindex: index,
  231. }
  232. req.AddData(msg)
  233. msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC)
  234. if err != nil {
  235. return nil, err
  236. }
  237. var res []Qdisc
  238. for _, m := range msgs {
  239. msg := nl.DeserializeTcMsg(m)
  240. attrs, err := nl.ParseRouteAttr(m[msg.Len():])
  241. if err != nil {
  242. return nil, err
  243. }
  244. // skip qdiscs from other interfaces
  245. if link != nil && msg.Ifindex != index {
  246. continue
  247. }
  248. base := QdiscAttrs{
  249. LinkIndex: int(msg.Ifindex),
  250. Handle: msg.Handle,
  251. Parent: msg.Parent,
  252. Refcnt: msg.Info,
  253. }
  254. var qdisc Qdisc
  255. qdiscType := ""
  256. for _, attr := range attrs {
  257. switch attr.Attr.Type {
  258. case nl.TCA_KIND:
  259. qdiscType = string(attr.Value[:len(attr.Value)-1])
  260. switch qdiscType {
  261. case "pfifo_fast":
  262. qdisc = &PfifoFast{}
  263. case "prio":
  264. qdisc = &Prio{}
  265. case "tbf":
  266. qdisc = &Tbf{}
  267. case "ingress":
  268. qdisc = &Ingress{}
  269. case "htb":
  270. qdisc = &Htb{}
  271. case "netem":
  272. qdisc = &Netem{}
  273. default:
  274. qdisc = &GenericQdisc{QdiscType: qdiscType}
  275. }
  276. case nl.TCA_OPTIONS:
  277. switch qdiscType {
  278. case "pfifo_fast":
  279. // pfifo returns TcPrioMap directly without wrapping it in rtattr
  280. if err := parsePfifoFastData(qdisc, attr.Value); err != nil {
  281. return nil, err
  282. }
  283. case "prio":
  284. // prio returns TcPrioMap directly without wrapping it in rtattr
  285. if err := parsePrioData(qdisc, attr.Value); err != nil {
  286. return nil, err
  287. }
  288. case "tbf":
  289. data, err := nl.ParseRouteAttr(attr.Value)
  290. if err != nil {
  291. return nil, err
  292. }
  293. if err := parseTbfData(qdisc, data); err != nil {
  294. return nil, err
  295. }
  296. case "htb":
  297. data, err := nl.ParseRouteAttr(attr.Value)
  298. if err != nil {
  299. return nil, err
  300. }
  301. if err := parseHtbData(qdisc, data); err != nil {
  302. return nil, err
  303. }
  304. case "netem":
  305. if err := parseNetemData(qdisc, attr.Value); err != nil {
  306. return nil, err
  307. }
  308. // no options for ingress
  309. }
  310. }
  311. }
  312. *qdisc.Attrs() = base
  313. res = append(res, qdisc)
  314. }
  315. return res, nil
  316. }
  317. func parsePfifoFastData(qdisc Qdisc, value []byte) error {
  318. pfifo := qdisc.(*PfifoFast)
  319. tcmap := nl.DeserializeTcPrioMap(value)
  320. pfifo.PriorityMap = tcmap.Priomap
  321. pfifo.Bands = uint8(tcmap.Bands)
  322. return nil
  323. }
  324. func parsePrioData(qdisc Qdisc, value []byte) error {
  325. prio := qdisc.(*Prio)
  326. tcmap := nl.DeserializeTcPrioMap(value)
  327. prio.PriorityMap = tcmap.Priomap
  328. prio.Bands = uint8(tcmap.Bands)
  329. return nil
  330. }
  331. func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
  332. native = nl.NativeEndian()
  333. htb := qdisc.(*Htb)
  334. for _, datum := range data {
  335. switch datum.Attr.Type {
  336. case nl.TCA_HTB_INIT:
  337. opt := nl.DeserializeTcHtbGlob(datum.Value)
  338. htb.Version = opt.Version
  339. htb.Rate2Quantum = opt.Rate2Quantum
  340. htb.Defcls = opt.Defcls
  341. htb.Debug = opt.Debug
  342. htb.DirectPkts = opt.DirectPkts
  343. case nl.TCA_HTB_DIRECT_QLEN:
  344. // TODO
  345. //htb.DirectQlen = native.uint32(datum.Value)
  346. }
  347. }
  348. return nil
  349. }
  350. func parseNetemData(qdisc Qdisc, value []byte) error {
  351. netem := qdisc.(*Netem)
  352. opt := nl.DeserializeTcNetemQopt(value)
  353. netem.Latency = opt.Latency
  354. netem.Limit = opt.Limit
  355. netem.Loss = opt.Loss
  356. netem.Gap = opt.Gap
  357. netem.Duplicate = opt.Duplicate
  358. netem.Jitter = opt.Jitter
  359. data, err := nl.ParseRouteAttr(value[nl.SizeofTcNetemQopt:])
  360. if err != nil {
  361. return err
  362. }
  363. for _, datum := range data {
  364. switch datum.Attr.Type {
  365. case nl.TCA_NETEM_CORR:
  366. opt := nl.DeserializeTcNetemCorr(datum.Value)
  367. netem.DelayCorr = opt.DelayCorr
  368. netem.LossCorr = opt.LossCorr
  369. netem.DuplicateCorr = opt.DupCorr
  370. case nl.TCA_NETEM_CORRUPT:
  371. opt := nl.DeserializeTcNetemCorrupt(datum.Value)
  372. netem.CorruptProb = opt.Probability
  373. netem.CorruptCorr = opt.Correlation
  374. case nl.TCA_NETEM_REORDER:
  375. opt := nl.DeserializeTcNetemReorder(datum.Value)
  376. netem.ReorderProb = opt.Probability
  377. netem.ReorderCorr = opt.Correlation
  378. }
  379. }
  380. return nil
  381. }
  382. func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
  383. native = nl.NativeEndian()
  384. tbf := qdisc.(*Tbf)
  385. for _, datum := range data {
  386. switch datum.Attr.Type {
  387. case nl.TCA_TBF_PARMS:
  388. opt := nl.DeserializeTcTbfQopt(datum.Value)
  389. tbf.Rate = uint64(opt.Rate.Rate)
  390. tbf.Peakrate = uint64(opt.Peakrate.Rate)
  391. tbf.Limit = opt.Limit
  392. tbf.Buffer = opt.Buffer
  393. case nl.TCA_TBF_RATE64:
  394. tbf.Rate = native.Uint64(datum.Value[0:8])
  395. case nl.TCA_TBF_PRATE64:
  396. tbf.Peakrate = native.Uint64(datum.Value[0:8])
  397. case nl.TCA_TBF_PBURST:
  398. tbf.Minburst = native.Uint32(datum.Value[0:4])
  399. }
  400. }
  401. return nil
  402. }
  403. const (
  404. TIME_UNITS_PER_SEC = 1000000
  405. )
  406. var (
  407. tickInUsec float64
  408. clockFactor float64
  409. hz float64
  410. )
  411. func initClock() {
  412. data, err := ioutil.ReadFile("/proc/net/psched")
  413. if err != nil {
  414. return
  415. }
  416. parts := strings.Split(strings.TrimSpace(string(data)), " ")
  417. if len(parts) < 3 {
  418. return
  419. }
  420. var vals [3]uint64
  421. for i := range vals {
  422. val, err := strconv.ParseUint(parts[i], 16, 32)
  423. if err != nil {
  424. return
  425. }
  426. vals[i] = val
  427. }
  428. // compatibility
  429. if vals[2] == 1000000000 {
  430. vals[0] = vals[1]
  431. }
  432. clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC
  433. tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor
  434. hz = float64(vals[0])
  435. }
  436. func TickInUsec() float64 {
  437. if tickInUsec == 0.0 {
  438. initClock()
  439. }
  440. return tickInUsec
  441. }
  442. func ClockFactor() float64 {
  443. if clockFactor == 0.0 {
  444. initClock()
  445. }
  446. return clockFactor
  447. }
  448. func Hz() float64 {
  449. if hz == 0.0 {
  450. initClock()
  451. }
  452. return hz
  453. }
  454. func time2Tick(time uint32) uint32 {
  455. return uint32(float64(time) * TickInUsec())
  456. }
  457. func tick2Time(tick uint32) uint32 {
  458. return uint32(float64(tick) / TickInUsec())
  459. }
  460. func time2Ktime(time uint32) uint32 {
  461. return uint32(float64(time) * ClockFactor())
  462. }
  463. func ktime2Time(ktime uint32) uint32 {
  464. return uint32(float64(ktime) / ClockFactor())
  465. }
  466. func burst(rate uint64, buffer uint32) uint32 {
  467. return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC)
  468. }
  469. func latency(rate uint64, limit, buffer uint32) float64 {
  470. return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer))
  471. }
  472. func Xmittime(rate uint64, size uint32) float64 {
  473. return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate))
  474. }