-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsch_htb.c
executable file
·2046 lines (1795 loc) · 68.4 KB
/
sch_htb.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* net/sched/sch_htb.c Hierarchical token bucket, feed tree version
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Martin Devera, <devik@cdi.cz>
*
* Credits (in time order) for older HTB versions:
* Stef Coene <stef.coene@docum.org>
* HTB support at LARTC mailing list
* Ondrej Kraus, <krauso@barr.cz>
* found missing INIT_QDISC(htb)
* Vladimir Smelhaus, Aamer Akhter, Bert Hubert
* helped a lot to locate nasty class stall bug
* Andi Kleen, Jamal Hadi, Bert Hubert
* code review and helpful comments on shaping
* Tomasz Wrona, <tw@eter.tym.pl>
* created test case so that I was able to fix nasty bug
* Wilfried Weissmann
* spotted bug in dequeue code and helped with fix
* Jiri Fojtasek
* fixed requeue routine
* and many others. thanks.
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <linux/list.h>
#include <linux/compiler.h>
#include <linux/rbtree.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
/* HTB algorithm.
Author: devik@cdi.cz
========================================================================
HTB is like TBF with multiple classes. It is also similar to CBQ because
it allows to assign priority to each class in hierarchy.
In fact it is another implementation of Floyd's formal sharing.
Levels:
Each class is assigned level. Leaf has ALWAYS level 0 and root
classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
one less than their parent.
*/
static int htb_hysteresis = 0;// __read_mostly = 0; /* whether to use mode hysteresis for speedup */
#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
#if HTB_VER >> 16 != TC_HTB_PROTOVER
#error "Mismatched sch_htb.c and pkt_sch.h"
#endif
/* Module parameter and sysfs export */
module_param (htb_hysteresis, int, 0640);
MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
/* used internaly to keep status of single class */
/*
但HTB模式是HTB_CAN_SEND时, 表示是可以发送, 没有阻塞; 为HTB_CANT_SEND时表示阻塞, 根本不能
发送数据包了; 为HTB_MAY_BORROW时也属于阻塞状态, 但可以向其他类别借带宽来发送.
*/
enum htb_cmode {
// 不能发送
HTB_CANT_SEND, /* class can't send and can't borrow */
// 借带宽
HTB_MAY_BORROW, /* class can't send but may borrow */
// 可发送
HTB_CAN_SEND /* class can send */
};
/*
但它是如何保证一棵n个结点的红黑树的高度始终保持在logn的呢?这就引出了红黑树的5个性质:
1.每个结点要么是红的要么是黑的。
2.根结点是黑的。
3.每个叶结点(叶结点即指树尾端NIL指针或NULL结点)都是黑的。
4.如果一个结点是红的,那么它的两个儿子都是黑的。
5.对于任意结点而言,其到叶结点树尾端NIL指针的每条路径都包含相同数目的黑结点。
*/
/*
Usage: ... qdisc add ... htb [default N] [r2q N]
default minor id of class to which unclassified packets are sent {0}
r2q DRR quantums are computed as rate in Bps/r2q {10}
debug string of 16 numbers each 0-3 {0}
... class add ... htb rate R1 [burst B1] [mpu B] [overhead O]
[prio P] [slot S] [pslot PS]
[ceil R2] [cburst B2] [mtu MTU] [quantum Q]
rate rate allocated to this class (class can still borrow)
burst max bytes burst which can be accumulated during idle period {computed}
mpu minimum packet size used in rate computations
overhead per-packet size overhead used in rate computations
linklay adapting to a linklayer e.g. atm
ceil definite upper class rate (no borrows) {rate}
cburst burst but for ceil {computed}
mtu max packet size we create rate map for {1600}
prio priority of leaf; lower are served first {0}
quantum how much bytes to serve from leaf at once {use r2q}
*/
/* interior & leaf nodes; props specific to leaves are marked L: */
//tc class add 两次后,会创建两个htb_class结构,这两个结构通过Qdisc_class_common -> hnode最终把htb_class加入到htb_sched(htb qdisc的私有数据)->clhash中,见htb_change_class -> qdisc_class_hash_insert
//htb_class经常用cl表示创建的tc class结构的地址,见htb_get
//参考<HTB介绍以及使用.doc>
struct htb_class {//htb_change_class中创建或者修改类,在创建htb子类的时候,会默认见一个pfifo_qdisc_ops叶子节点,真正SKB入队是如到该叶子节点的SKB队列上面
struct Qdisc_class_common common; //通过这个和htb私有数据关联起来,见htb_change_class -> qdisc_class_hash_insert,连接到Qdisc_class_hash中的hash中
/* general class parameters */
struct gnet_stats_basic_packed bstats; // 字节数, 包数统计
struct gnet_stats_queue qstats;// 队列信息统计
struct gnet_stats_rate_est rate_est;// 速率统计, 字节率, 包率
struct tc_htb_xstats xstats; /* our special stats */// HTB统计信息, 借出, 借入, 令牌等参数
int refcnt; /* usage count of this class */// HTB类别引用计数 在创建class的时候初始化为1,见htb_change_class
/* topology */
// 在树中的层次, 0表示叶子节点, 根节点层次是TC_HTB_MAXDEPTH-1(7)
int level; /* our level (see above) */
unsigned int children;
//该结构和上面的common都能保证所有的htb_class(tc class add的时候创建的class信息节点)关联在一起
struct htb_class *parent; /* parent class */ //tc class add parent 5:2 classid 6: xxxx, parent为父class
/*
Usage: ... qdisc add ... htb [default N] [r2q N]
default minor id of class to which unclassified packets are sent {0}
r2q DRR quantums are computed as rate in Bps/r2q {10}
debug string of 16 numbers each 0-3 {0}
... class add ... htb rate R1 [burst B1] [mpu B] [overhead O]
[prio P] [slot S] [pslot PS]
[ceil R2] [cburst B2] [mtu MTU] [quantum Q]
rate rate allocated to this class (class can still borrow)
burst max bytes burst which can be accumulated during idle period {computed}
mpu minimum packet size used in rate computations
overhead per-packet size overhead used in rate computations
linklay adapting to a linklayer e.g. atm
ceil definite upper class rate (no borrows) {rate}
cburst burst but for ceil {computed}
mtu max packet size we create rate map for {1600}
prio priority of leaf; lower are served first {0}
根据HTB的官方文档显示,quantum是在可以“借”的情况下,一次可以“借”多少,并且说这个值最好尽量的小,但要大于MTU;而且这个
值是不用手动设置,它会根据r2q的值计算出来。
quantum how much bytes to serve from leaf at once {use r2q}
*/ //如果应用层设置为超过8,则默认修改为7,见htb_change_class
int prio; /* these two are used only by leaves... */ //取值范围小于TC_HTB_NUMPRIO 见htb_activate
//quantum参数在htb_dequeue_tree中会使用到
int quantum; /* but stored for parent-to-leaf return */ // 定额参数, 缺省是取物理网卡的队列长度值 最小长度为1000 最大200000 可以通过应用层参数quantum设置
union {
struct htb_class_leaf { // 如果该节点是叶子节点, 则使用leaf结构, 实现具体的流控处理;
struct Qdisc *q; //新建的htb class分类规则的默认叶子qdisc为pfifo_qdisc_ops
int deficit[TC_HTB_MAXDEPTH];// 不同层次深度的赤字 出队的时候用到,见htb_dequeue_tree。 没发送SKB->LEN的数据包,该值减少len
struct list_head drop_list;// 挂接到丢包链表, 添加到htb_sched->drops[]链表中,见htb_activate
} leaf;
// 如果非叶子节点, 使用HTB内部类别结构inner, 用于形成分类树
struct htb_class_inner {
// 提供数据包的红黑树结构, 是一个按类别ID进行排序的有序表, 以二叉树实现,
// 不同优先权对应不同的二叉树
// feed存放其子孙是yellow的节点,子孙需要向父类借额度,才可以进行调度; 见htb_activate_prios
struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ //class子类通过prio添加到父节点的feed[i]上,见htb_add_to_id_tree
// 当前优先权树中正在处理的那个节点的指针
struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
/* When class changes from state 1->2 and disconnects from
parent's feed then we lost ptr value and start from the
first child again. Here we store classid of the
last valid ptr (used when ptr is NULL). */
// 上一个有效的树节点的类别ID,见htb_deactivate_prios
u32 last_ptr_id[TC_HTB_NUMPRIO];
} inner;
} un;
// 类别结构自己的数据包供应树
struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
// 事件树, 实际是等待树, 当带宽超过限制时会将该类别节点挂接到HTB流控节点的
// 等待队列wait_pq
struct rb_node pq_node; /* node for event queue */
psched_time_t pq_key;
/*从这里可以看到,如果是非叶子节点,则可能是其下级class或者叶子节点的或操作,
//所以如果是非叶子节点,第一层level最多两个优先级,第二层level最多三个优先级,第七层level最多8个优先级,见htb_activate_prios*/
// 激活的优先权参数, 非0表示相应位数的数据队列有数据包可用 某位为1表示该位对应的un->inner->feed[i]优先权的数据可用
int prio_activity; /* for which prios are we active */ //赋值见htb_activate 和上面的prio一样 cl->prio_activity = 1 << cl->prio;
enum htb_cmode cmode; /* current mode of the class */ // 当前模式, 表示是否可发送数据包 默认HTB_CAN_SEND
/* class attached filters *///tc filter add dev eth0 parent 1:3 protocol ip prio 100 xxxx
//该class分类对应的过滤器 //在htb_find_tcf中如果查找不到class则直接把过滤器添加到跟过滤器上
struct tcf_proto *filter_list; //每个分类信息中都有一个这样的过滤器,过滤器是为具体的某个类节点添加的,上面就是在1:3上面添加一个过滤器
int filter_cnt; //见htb_bind_filter // 过滤器使用计数
/* token bucket parameters */
struct qdisc_rate_table *rate; /* rate table of the class itself */ // 令牌率 通过这个加入到qdisc_rtab_list,见qdisc_put_rtab
struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ // 峰值率 通过这个加入到qdisc_rtab_list,见qdisc_put_rtab
//对应tc_htb_opt中的buffer和cbuffer
long buffer, cbuffer; /* token bucket depth/rate */ // 缓冲区/峰值缓冲区 这两个是htb令牌桶算法中的用处是:如果来了一个skb,该skb比令牌数大,则把skb数据缓存到buffer中,等令牌够的时候把skb发送出去
psched_tdiff_t mbuffer; /* max wait time */ // 最大等待时间 默认60 * PSCHED_TICKS_PER_SEC;
long tokens, ctokens; /* current number of tokens */// 当前令牌数/峰值令牌 对应tc_htb_opt中的buffer和cbuffer
psched_time_t t_c; /* checkpoint time */// 检查点时间
};
struct htb_sched {
//tc qdisc add class xxxx htb的时候创建的struct htb_class都添加到该hash表中(通过htb_class ->Qdisc_class_common ->hnode加到这里面,
//见htb_change_class -> qdisc_class_hash_insert) ,htb_class和htb_sched关联起来,
//clhash初始化地方在qdisc_class_hash_init
////创建htb_class的时候创建的struct htb_class是加入到htb_sched(htb qdisc的私有数据)->clhash中,但并没有形成一颗红黑树,形成红黑树是在htb_enqueue->htb_activate实现
struct Qdisc_class_hash clhash; //tc class add parent 2 classid 2:3 xx ,根据classid 2:3加入到该链表中,这里面存储的就是简单的classid号以及该链表上tc add class有多少个class
//cl->un.leaf.drop_list是加到该表中的,见htb_activate , htb_class->un.leaf.drop_list添加到对应的drops[]中
struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ //和class->prio有关系,见htb_activate
/* self list - roots of self generating tree */
// RB树根节点, 对应每一层的每一个优先权值都有一个RB树, 见htb_add_class_to_row
struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; //row存放green节点,就是token额度还有剩余、可以进行调度的class节点;
int row_mask[TC_HTB_MAXDEPTH];// 掩码, 表示该层的哪些优先权值的树有效。见htb_add_class_to_row
/* ptr是DRR算法的一个标记,指向当前可以进行调度的节点(类)。 如果当前节点的deficit用完了,htb_next_rb_node()会将ptr指针指向当前节点
的下一个节点,然后再从ptr指向的节点进行调度。*/
struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];// 父节点指针
u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];// 上次使用的非空父节点的类别ID
/* self wait list - roots of wait PQs per row */
//wait_pq存放yellow和red节点,在特定时间进行检查是否可以把节点恢复到green。
struct rb_root wait_pq[TC_HTB_MAXDEPTH];// 等待队列, 用来挂接那些带宽超出限制的节点
/* time of nearest event per level (row) */
psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
//tc qdisc del add eth0 root handle 22 htb default 3333333 // 缺省类别 minor id of class to which unclassified packets are sent {0}
int defcls; /* class where unclassified flows go to */ //无法通过过滤器选择子类的SKB默认走该分类,
/* filters for qdisc itself */
struct tcf_proto *filter_list;//在htb_find_tcf中如果查找不到class则直接把过滤器添加到跟过滤器上
//DRR quantums are computed as rate in Bps/r2q {10} 应用层设置的默认值为10,见htb_parse_opt
int rate2quantum; /* quant = rate / rate2quantum */ // 速率到定额转换参数 初始值为1,见htb_init
psched_time_t now; /* cached dequeue time */ // 当前时间
struct qdisc_watchdog watchdog;
/* non shaped skbs; let them go directly thru */
//如果SKB到来的时候没有匹配的过滤器,并且默认default队列规程也没匹配成功,则直接使用私有信息中的direct_queue队列入队,见htb_enqueue
struct sk_buff_head direct_queue; //能否入队就是用该队列上的SKB个数与下面的direct_qlen做比较, 这个skb链表上面的东西是不需要限速的,见htb_dequeue
int direct_qlen; /* max qlen of above */ //q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
long direct_pkts;// 直接处理的数据包计数
#define HTB_WARN_TOOMANYEVENTS 0x1
unsigned int warned; /* only one warning */
struct work_struct work;
};
/* find class in global hash table using given handle */// 根据类别ID查找类别结构 // 根据句柄handle查找HTB节点
static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
{
struct htb_sched *q = qdisc_priv(sch);
struct Qdisc_class_common *clc;
clc = qdisc_class_find(&q->clhash, handle);
if (clc == NULL)
return NULL;
return container_of(clc, struct htb_class, common);
}
/**
* htb_classify - classify a packet into class
*
* It returns NULL if the packet should be dropped or -1 if the packet
* should be passed directly thru. In all other cases leaf class is returned.
* We allow direct class selection by classid in priority. The we examine
* filters in qdisc and in inner nodes (if higher filter points to the inner
* node). If we end up with classid MAJOR:0 we enqueue the skb into special
* internal fifo (direct). These packets then go directly thru. If we still
* have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
* then finish and return direct queue.
*/
// HTB分类操作, 对数据包进行分类, 然后根据类别进行相关操作
// 返回NULL表示没找到, 返回-1表示是直接通过(不分类)的数据包
//该SKB没有对应匹配的过滤器,则使用默认的过滤器,也就是tc qdisc add xxxxx htb default a中的a。见htb_classify
#define HTB_DIRECT (struct htb_class*)-1
static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl;
struct tcf_result res;
struct tcf_proto *tcf;
int result;
/* allow to select class by setting skb->priority to valid classid;
note that nfmark can be used too by attaching filter fw with no
rules in it */
if (skb->priority == sch->handle)// 如果数据包优先权值就等于流控节点和句柄handle, 属于根节点操作, 直接处理
return HTB_DIRECT; /* X:0 (direct flow) selected */
if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)// 查找和数据包优先权值对应的HTB叶子节点, 找到则返回
return cl;
/*
// 以下处理是没有找到和skb->priority直接对应的HTB叶子节点, 应该说实际应用中大部分都是skb->priority为0的, 所以一般都会运行到这里
*/
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
tcf = q->filter_list;//过滤器链表
//通过skb内容来匹配tc filter链表tp,找到返回对应的分类节点,匹配成功返回0并把匹配的过滤器所在的tc class分类节点信息存到res中,匹配成功返回0
while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT // 定义了可对分类结果进行动作的内核选项的情况
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
}
#endif
if ((cl = (void *)res.class) == NULL) { //如果返回的结果中的res.class=0
if (res.classid == sch->handle)// 如果分类结果的ID等于流控句柄, 直接处理
return HTB_DIRECT; /* X:0 (direct flow) */
if ((cl = htb_find(res.classid, sch)) == NULL)
break; /* filter selected invalid classid */
}
if (!cl->level)
return cl; /* we hit leaf; return it */
/* we have got inner class; apply inner filter chain */
tcf = cl->filter_list;
}
// 循环外是没找到分类的情况
/* classification failed; try to use default class */// 用缺省类别ID查找, 看是否定义了缺省类别
cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
if (!cl || cl->level) //该SKB没有对应匹配的过滤器,则使用默认的过滤器查找,也就是tc qdisc add xxxxx htb default a中的a。如果默认的分类也没找到,则返回HTB_DIRECT
return HTB_DIRECT; /* bad default .. this is safe bet */
return cl;
}
/**
* htb_add_to_id_tree - adds class to the round robin list
*
* Routine adds class to the list (actually tree) sorted by classid.
* Make sure that class is not already on such list for given prio.
*/ //把class cl添加到红黑树中
static void htb_add_to_id_tree(struct rb_root *root,
struct htb_class *cl, int prio)
{
struct rb_node **p = &root->rb_node, *parent = NULL;
// RB树是有序表, 根据类别ID排序, 值大的到右节点, 小的到左节点
// 循环, 查找树中合适的位置插入类别节点cl
while (*p) {//先要找到插入那个rh数节点后面
struct htb_class *c;
parent = *p;
c = rb_entry(parent, struct htb_class, node[prio]);
if (cl->common.classid > c->common.classid)
p = &parent->rb_right;
else
p = &parent->rb_left;
}
// 进行RB树的插入操作, RB树标准函数操作
rb_link_node(&cl->node[prio], parent, p);
rb_insert_color(&cl->node[prio], root);
}
/**
* htb_add_to_wait_tree - adds class to the event queue with delay
*
* The class is added to priority event queue to indicate that class will
* change its mode in cl->pq_key microseconds. Make sure that class is not
* already in the queue.
*//*
HTB出队过程
__qdisc_run -> qdisc_restart -> dequeue_skb -> htb_dequeue
htb_dequeue
-> __skb_dequeue
-> htb_do_events
-> htb_safe_rb_erase
-> htb_change_class_mode
-> htb_add_to_wait_tree
-> htb_dequeue_tree
-> htb_lookup_leaf
-> htb_deactivate
-> q->dequeue
-> htb_next_rb_node
-> htb_charge_class
-> htb_change_class_mode
-> htb_safe_rb_erase
-> htb_add_to_wait_tree
-> htb_delay_by
*/
static void htb_add_to_wait_tree(struct htb_sched *q,
struct htb_class *cl, long delay)
{
struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
cl->pq_key = q->now + delay;
if (cl->pq_key == q->now)
cl->pq_key++;
/* update the nearest event cache */
if (q->near_ev_cache[cl->level] > cl->pq_key)
q->near_ev_cache[cl->level] = cl->pq_key;
while (*p) {
struct htb_class *c;
parent = *p;
c = rb_entry(parent, struct htb_class, pq_node);
if (cl->pq_key >= c->pq_key)
p = &parent->rb_right;
else
p = &parent->rb_left;
}
rb_link_node(&cl->pq_node, parent, p);
rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
}
/**
* htb_next_rb_node - finds next node in binary tree
*
* When we are past last key we return NULL.
* Average complexity is 2 steps per call.
*//*
HTB出队过程
__qdisc_run -> qdisc_restart -> dequeue_skb -> htb_dequeue
htb_dequeue
-> __skb_dequeue
-> htb_do_events
-> htb_safe_rb_erase
-> htb_change_class_mode
-> htb_add_to_wait_tree
-> htb_dequeue_tree
-> htb_lookup_leaf
-> htb_deactivate
-> q->dequeue
-> htb_next_rb_node
-> htb_charge_class
-> htb_change_class_mode
-> htb_safe_rb_erase
-> htb_add_to_wait_tree
-> htb_delay_by
*/ //该函数执行后,n会执行该红黑树的下一个节点
static inline void htb_next_rb_node(struct rb_node **n)
{
*n = rb_next(*n);
}
/**
* htb_add_class_to_row - add class to its row
*
* The class is added to row at priorities marked in mask.
* It does nothing if mask == 0.
*/
//把cl添加到q->row[cl->level] + prio(cl->level层,prio优先级)的红黑树跟中。
// RB树根节点, 对应每一层的每一个优先权值都有一个RB树。 也就是根据cl->level和mask中的prio来确定把cl添加到htb_sched私有数据部分的那个跟row[][]中
static inline void htb_add_class_to_row(struct htb_sched *q,
struct htb_class *cl, int mask)
{
// 将cl层次对应的ROW的row_mask或上新的mask, 表示有对应prio的数据了
q->row_mask[cl->level] |= mask;
while (mask) {
int prio = ffz(~mask);
mask &= ~(1 << prio);//获取mask中每位为1的位,例如0X23,则while循环的prio依次是0 1 5几个数字
htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);//q->row[cl->level] + prio对应cl->level层的prio优先级红黑树根
}
}
/* If this triggers, it is a bug in this code, but it need not be fatal */
/*
HTB出队过程
__qdisc_run -> qdisc_restart -> dequeue_skb -> htb_dequeue
htb_dequeue
-> __skb_dequeue
-> htb_do_events
-> htb_safe_rb_erase
-> htb_change_class_mode
-> htb_add_to_wait_tree
-> htb_dequeue_tree
-> htb_lookup_leaf
-> htb_deactivate
-> q->dequeue
-> htb_next_rb_node
-> htb_charge_class
-> htb_change_class_mode
-> htb_safe_rb_erase
-> htb_add_to_wait_tree
-> htb_delay_by
*/
static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
{
if (RB_EMPTY_NODE(rb)) {
WARN_ON(1);
} else {
rb_erase(rb, root);
RB_CLEAR_NODE(rb);
}
}
/**
* htb_remove_class_from_row - removes class from its row
*
* The class is removed from row at priorities marked in mask.
* It does nothing if mask == 0.
*/
static inline void htb_remove_class_from_row(struct htb_sched *q,
struct htb_class *cl, int mask)
{
int m = 0;
while (mask) {
int prio = ffz(~mask);// prio为mask第一个1位的位置
mask &= ~(1 << prio);// 去掉该位
// 如果流控节点的该层该prio的rb树节点指向的是cl的prio的rb树节点, 更新到树的下一个rb节点
if (q->ptr[cl->level][prio] == cl->node + prio)
htb_next_rb_node(q->ptr[cl->level] + prio);
htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);// 从ROW树中断开cl
if (!q->row[cl->level][prio].rb_node)// 如果该层该prio的rb树位空, 记录其位置
m |= 1 << prio;
}
q->row_mask[cl->level] &= ~m;// 在ROW掩码中将与rb树为空的那些prio位清空
}
/**
* htb_activate_prios - creates active classe's feed chain
*
* The class is connected to ancestors and/or appropriate rows
* for priorities it is participating on. cl->cmode must be new
* (activated) mode. It does nothing if cl->prio_activity == 0.
*/
// 激活操作, 建立数据提供树
// cl->prio_activity为0时就是一个空函数, 不过从前面看prio_activity似乎是不会为0的
//该函数就是把cl添加到p->un.inner.feed[prio]上,同时跟新父p->prio_activity中的prio位置1,同时父的父上对应的p->prio_activity中的prio位置1,
//也就是整棵树从该cl以上都可用。 参考http://luxik.cdi.cz/~devik/qos/htb/manual/theory.htm http://blog.chinaunix.net/uid-7220314-id-208698.html
static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
{
struct htb_class *p = cl->parent;
long m, mask = cl->prio_activity;// prio_activity是作为一个掩码, 可应该只有一位为1
// 在当前模式是HTB_MAY_BORROW情况下进入循环, 某些情况下这些类别是可以激活的
// 绝大多数情况p和mask的初始值应该都是非0值
while (cl->cmode == HTB_MAY_BORROW && p && mask) { //图解参考<Hierachical token bucket theory> http://luxik.cdi.cz/~devik/qos/htb/manual/theory.htm
m = mask;// 备份mask值
while (m) {
// 掩码取反, 找第一个0位的位置, 也就是原来最低为1的位的位置
// prio越小, 等级越高, 取数据包也是先从prio值小的节点取
int prio = ffz(~m); //通过while(m)可以知道,prio就是mask中每位为1所在的位置
m &= ~(1 << prio);// 清除该位 最后得到的结果就是m的值为mask的最低位1变0后的值
// p是父节点, 所以inner结构肯定有效, 不会使用leaf结构的
// 如果父节点的prio优先权的数据包的提供树已经存在, 在掩码中去掉该位
if (p->un.inner.feed[prio].rb_node)
/* parent already has its feed in use so that
reset bit in mask as parent is already ok */
mask &= ~(1 << prio);
// 将该类别加到父节点的prio优先权提供数据包的节点树中
htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
}
// 父节点的prio_activity或上mask中的置1位, 某位为1表示该位对应的优先权的数据可用
//从这里可以看到,如果是非叶子节点,则可能是其下级class或者叶子节点的或操作,
//所以如果是非叶子节点,第一层level最多两个优先级,第二层level最多三个优先级,第七层level最多8个优先级
p->prio_activity |= mask;
// 循环到上一层, 当前类别更新父节点, 父节点更新为祖父节点
cl = p;
p = cl->parent;
}
// 如果cl是HTB_CAN_SEND模式, 将该类别添加到合适的ROW中
// 此时的cl可能已经不是原来的cl了,而是原cl的长辈节点了
if (cl->cmode == HTB_CAN_SEND && mask)
htb_add_class_to_row(q, cl, mask);
}
/**
* htb_deactivate_prios - remove class from feed chain
*
* cl->cmode must represent old mode (before deactivation). It does
* nothing if cl->prio_activity == 0. Class is removed from all feed
* chains and rows.
*/
static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
{
struct htb_class *p = cl->parent;
long m, mask = cl->prio_activity; // 类别结构的优先权活性值作为掩码, 如果是0的话本函数相当于空函数
// 在当前模式是HTB_MAY_BORROW情况下进入循环,
// 绝大多数情况p和mask的初始值应该都是非0值
while (cl->cmode == HTB_MAY_BORROW && p && mask) {
m = mask;// 备份掩码
mask = 0;// 掩码清零
while (m) {
int prio = ffz(~m);// prio为m的第一个1值的位(取反后第一个0值的位)
m &= ~(1 << prio);// 去除该位
if (p->un.inner.ptr[prio] == cl->node + prio) {// 如果该类别prio对应的rb树是父节点中正在处理的
/* we are removing child which is pointed to from
parent feed - forget the pointer but remember
classid */
p->un.inner.last_ptr_id[prio] = cl->common.classid;// 将cl的类别ID保存到last_ptr_id中prio对应位置
p->un.inner.ptr[prio] = NULL;
}
htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);// 类别节点从与prio相应rb树中断开
if(!p->un.inner.feed[prio].rb_node)//对已经空了的rb树保存其位置
mask |= 1 << prio;
}
p->prio_activity &= ~mask;// 将已经空了的rb数掩码从父节点的活性值掩码中去掉
cl = p;// 转到上一层处理
p = cl->parent;
}
if (cl->cmode == HTB_CAN_SEND && mask)// 如果当前类别cl的模式是可以发送(无阻塞, 无借带宽), 将cl从ROW的相关树中断开
htb_remove_class_from_row(q, cl, mask);
}
static inline long htb_lowater(const struct htb_class *cl)
{
if (htb_hysteresis)
return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
else
return 0;
}
static inline long htb_hiwater(const struct htb_class *cl)
{
if (htb_hysteresis)
return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
else
return 0;
}
/**
* htb_class_mode - computes and returns current class mode
*
* It computes cl's mode at time cl->t_c+diff and returns it. If mode
* is not HTB_CAN_SEND then cl->pq_key is updated to time difference
* from now to time when cl will change its state.
* Also it is worth to note that class mode doesn't change simply
* at cl->{c,}tokens == 0 but there can rather be hysteresis of
* 0 .. -cl->{c,}buffer range. It is meant to limit number of
* mode transitions per time unit. The speed gain is about 1/6.
*/
static inline enum htb_cmode
htb_class_mode(struct htb_class *cl, long *diff)
{
long toks;
if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {// 计算类别的Ceil令牌
*diff = -toks;// 如果令牌小于低限
return HTB_CANT_SEND;
}
// 计算类别的普通令牌
// 如果令牌大于高限, 模式为可发送
if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
return HTB_CAN_SEND;
*diff = -toks;
return HTB_MAY_BORROW;// 否则模式为可借
}
/**
* htb_change_class_mode - changes classe's mode
*
* This should be the only way how to change classe's mode under normal
* cirsumstances. Routine will update feed lists linkage, change mode
* and add class to the wait event queue if appropriate. New mode should
* be different from old one and cl->pq_key has to be valid if changing
* to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
*//*
HTB出队过程
__qdisc_run -> qdisc_restart -> dequeue_skb -> htb_dequeue
htb_dequeue
-> __skb_dequeue
-> htb_do_events
-> htb_safe_rb_erase
-> htb_change_class_mode
-> htb_add_to_wait_tree
-> htb_dequeue_tree
-> htb_lookup_leaf
-> htb_deactivate
-> q->dequeue
-> htb_next_rb_node
-> htb_charge_class
-> htb_change_class_mode
-> htb_safe_rb_erase
-> htb_add_to_wait_tree
-> htb_delay_by
*//*
HTB出队过程
__qdisc_run -> qdisc_restart -> dequeue_skb -> htb_dequeue
htb_dequeue
-> __skb_dequeue
-> htb_do_events
-> htb_safe_rb_erase
-> htb_change_class_mode
-> htb_add_to_wait_tree
-> htb_dequeue_tree
-> htb_lookup_leaf
-> htb_deactivate
-> q->dequeue
-> htb_next_rb_node
-> htb_charge_class
-> htb_change_class_mode
-> htb_safe_rb_erase
-> htb_add_to_wait_tree
-> htb_delay_by
*/ // 调整类别节点的发送模式
static void
htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) //调整class模式,同时通过htb_activate_prios重新形成row leaf树等
{
enum htb_cmode new_mode = htb_class_mode(cl, diff);// 根据变化值计算新模式
if (new_mode == cl->cmode)// 模式没变, 返回
return;
if (cl->prio_activity) { /* not necessary: speed optimization */// cl->prio_activity非0表示是活动的节点, 需要停止后再更新模式
if (cl->cmode != HTB_CANT_SEND)// 如原来的模式不可发送数据, 先停该节点
htb_deactivate_prios(q, cl);
cl->cmode = new_mode;
if (new_mode != HTB_CANT_SEND)// 如果新模式不是禁止发送, 重新激活节点
htb_activate_prios(q, cl);
} else // 非活动类别节点, 直接更新模式值
cl->cmode = new_mode;
}
/**
* htb_activate - inserts leaf cl into appropriate active feeds
*
* Routine learns (new) priority of leaf and activates feed chain
* for the prio. It can be called on already active leaf safely.
* It also adds leaf into droplist.
*/
// 激活类别结构, 将该类别节点作为数据包提供者, 而数据类别表提供是一个
// 有序表, 以RB树形式实现
//创建htb_class的时候创建的struct htb_class是加入到htb_sched(htb qdisc的私有数据)->clhash中,但并没有形成一颗红黑树,形成红黑树是在htb_enqueue->htb_activate实现
static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
{
WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
if (!cl->prio_activity) {// 如果类别的prio_activity参数为0才进行操作, 非0表示已经激活了
// prio_activity是通过叶子节点的prio值来设置的, 至少是1, 最大是1<<7, 非0值
// leaf.aprio保存当前的leaf.prio
cl->prio_activity = 1 << cl->prio;
htb_activate_prios(q, cl);// 进行实际的激活操作
list_add_tail(&cl->un.leaf.drop_list,
q->drops + cl->prio);// 根据leaf.aprio添加到指定的优先权位置的丢包链表
}
}
/**
* htb_deactivate - remove leaf cl from active feeds
*
* Make sure that leaf is active. In the other words it can't be called
* with non-active leaf. It also removes class from the drop list.
*//*
HTB出队过程
__qdisc_run -> qdisc_restart -> dequeue_skb -> htb_dequeue
htb_dequeue
-> __skb_dequeue
-> htb_do_events
-> htb_safe_rb_erase
-> htb_change_class_mode
-> htb_add_to_wait_tree
-> htb_dequeue_tree
-> htb_lookup_leaf
-> htb_deactivate
-> q->dequeue
-> htb_next_rb_node
-> htb_charge_class
-> htb_change_class_mode
-> htb_safe_rb_erase
-> htb_add_to_wait_tree
-> htb_delay_by
*/
//将类别叶子节点从活动的数据包提供树中去掉,cl是叶子节点
static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
{
WARN_ON(!cl->prio_activity);
htb_deactivate_prios(q, cl);// 关闭
cl->prio_activity = 0;// 类别的活性值prio_activity清零
list_del_init(&cl->un.leaf.drop_list);
}
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
int uninitialized_var(ret);
struct htb_sched *q = qdisc_priv(sch);
//这里通过过滤器如果匹配的class为叶子无聊队列规程,则可以跳过中间队列规程树,直接跳到叶子,直接入队。参考<HTB介绍以及使用.doc>
struct htb_class *cl = htb_classify(skb, sch, &ret);//进队首先调用的是分类器,查看数据包要放到那个队列中,然后再进行进队操作
if (cl == HTB_DIRECT) { //在过滤器中没有找到匹配的分类,并且默认的class也没找到,也就是tc adisc add xxxxx defualt 5中的5。则直接使用htb_sched->direct_queue入队
/* enqueue to helper queue */
if (q->direct_queue.qlen < q->direct_qlen) {
__skb_queue_tail(&q->direct_queue, skb);
q->direct_pkts++;
} else {
kfree_skb(skb);
sch->qstats.drops++;
return NET_XMIT_DROP;
}
#ifdef CONFIG_NET_CLS_ACT
} else if (!cl) {
if (ret & __NET_XMIT_BYPASS)
sch->qstats.drops++;
kfree_skb(skb);
return ret;
#endif
} else { //如果SKB是满足过滤器对应的分类规则,或者默认default中的队列规程存在,则走这里
if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) { //递归循环入队,直到找到叶子节点,htb默认的是pfifo_qdisc_ops
if (net_xmit_drop_count(ret)) { //入队失败
sch->qstats.drops++;
cl->qstats.drops++;
}
return ret;
} else { //入队成功,入队实际上是加到叶子无类队列规程上面,例如pfifo fifo_fast等
cl->bstats.packets +=
skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
/*
// 激活HTB类别, 建立该类别的数据提供树, 这样dequeue时可以从中取数据包
// 只有类别节点的模式是可发送和可租借的情况下才会激活, 如果节点是阻塞
// 模式, 则不会被激活
*/
cl->bstats.bytes += qdisc_pkt_len(skb);
htb_activate(q, cl);
}
}
sch->q.qlen++;
sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
sch->bstats.bytes += qdisc_pkt_len(skb);
return NET_XMIT_SUCCESS;
}
static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
{
long toks = diff + cl->tokens;
if (toks > cl->buffer)
toks = cl->buffer;
toks -= (long) qdisc_l2t(cl->rate, bytes);
if (toks <= -cl->mbuffer)
toks = 1 - cl->mbuffer;
cl->tokens = toks;
}
static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
{
long toks = diff + cl->ctokens;
if (toks > cl->cbuffer)
toks = cl->cbuffer;
toks -= (long) qdisc_l2t(cl->ceil, bytes);
if (toks <= -cl->mbuffer)
toks = 1 - cl->mbuffer;
cl->ctokens = toks;
}
/**
* htb_charge_class - charges amount "bytes" to leaf and ancestors
*
* Routine assumes that packet "bytes" long was dequeued from leaf cl
* borrowing from "level". It accounts bytes to ceil leaky bucket for
* leaf and all ancestors and to rate bucket for ancestors at levels
* "level" and higher. It also handles possible change of mode resulting
* from the update. Note that mode can also increase here (MAY_BORROW to
* CAN_SEND) because we can use more precise clock that event queue here.
* In such case we remove class from event queue first.
*//*
HTB出队过程
__qdisc_run -> qdisc_restart -> dequeue_skb -> htb_dequeue
htb_dequeue
-> __skb_dequeue
-> htb_do_events
-> htb_safe_rb_erase
-> htb_change_class_mode
-> htb_add_to_wait_tree
-> htb_dequeue_tree
-> htb_lookup_leaf
-> htb_deactivate
-> q->dequeue
-> htb_next_rb_node
-> htb_charge_class
-> htb_change_class_mode
-> htb_safe_rb_erase
-> htb_add_to_wait_tree
-> htb_delay_by
*/// 处理该流控节点cl以及其所有父节点q的令牌情况, 调整该类别的模式cmode //调整class模式,同时通过htb_activate_prios重新形成row leaf树等
static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
int level, struct sk_buff *skb)
{
int bytes = qdisc_pkt_len(skb);
enum htb_cmode old_mode;
long diff;
// 循环向上到根节点
while (cl) {
// 时间间隔
diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
// 类别层次高的借出增加
if (cl->level >= level) {
if (cl->level == level)
cl->xstats.lends++;
//计算普通令牌
htb_accnt_tokens(cl, bytes, diff);
} else {
// 类别层次低
// 借入增加
cl->xstats.borrows++;
cl->tokens += diff; /* we moved t_c; update tokens */// 令牌增加
}
htb_accnt_ctokens(cl, bytes, diff);// 计算Ceil令牌
cl->t_c = q->now;
old_mode = cl->cmode;// 保存类别节点原来的模式
diff = 0;
// 根据新的令牌,缓冲区数来更新类别节点的模式, 因为前面diff数据已经在令牌中修改过了
// 所以现在diff输入值设为0了, 函数结束, 类别模式不是可发送时, diff中保存当前令牌数
// 的负值
htb_change_class_mode(q, cl, &diff);//调整class模式,同时通过htb_activate_prios重新形成row leaf树等
if (old_mode != cl->cmode) { // 如果类别模式发生了变化。
if (old_mode != HTB_CAN_SEND)// 如果老模式不是可以直接发送的模式(HTB_CAN_SEND), 说明在等待RB树中, 要从该RB树中删除
htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
if (cl->cmode != HTB_CAN_SEND)
htb_add_to_wait_tree(q, cl, diff);// 如果当前新模式不是可以直接发送的模式(HTB_CAN_SEND), 挂接到合适的等待RB树
}
/* update byte stats except for leaves which are already updated */
if (cl->level) {// 如果是中间节点, 更新其统计值, 因为对于叶子节点已经在数据包出队时处理过了
cl->bstats.bytes += bytes;
cl->bstats.packets += skb_is_gso(skb)?
skb_shinfo(skb)->gso_segs:1;
}
cl = cl->parent;
}
}
/**
* htb_do_events - make mode changes to classes at the level
*
* Scans event queue for pending events and applies them. Returns time of
* next pending event (0 for no event in pq, q->now for too many events).
* Note: Applied are events whose have cl->pq_key <= q->now.
*/
/*
__qdisc_run -> qdisc_restart -> dequeue_skb -> htb_dequeue
htb_dequeue
-> __skb_dequeue
-> htb_do_events
-> htb_safe_rb_erase
-> htb_change_class_mode
-> htb_add_to_wait_tree
-> htb_dequeue_tree
-> htb_lookup_leaf
-> htb_deactivate
-> q->dequeue
-> htb_next_rb_node
-> htb_charge_class
-> htb_change_class_mode
-> htb_safe_rb_erase
-> htb_add_to_wait_tree
-> htb_delay_by
*/ // 对第level号等待树的类别节点进行模式调整
static psched_time_t htb_do_events(struct htb_sched *q, int level,
unsigned long start)
{
/* don't run for longer than 2 jiffies; 2 is used instead of
1 to simplify things when jiffy is going to be incremented
too soon */