赵占旭的博客

[转]Cisco VPP Ethernet Node分析

注:本文是转载,但不是100%的转载,可能稍微有些出入,原文地址点击这里

核心函数


ethernet_input_init

初始化函数,主循环之前会调用。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
static clib_error_t *ethernet_input_init (vlib_main_t * vm)
{
//支持vlan,和qinq协议
ethernet_main_t *em = &ethernet_main;
__attribute__ ((unused)) vlan_table_t *invalid_vlan_table;
__attribute__ ((unused)) qinq_table_t *invalid_qinq_table;

//只是给format_buffer,unformat_buffer赋值,
//值得注意的是有对packet generate初始化,基本协议都有自己的pg实现。
ethernet_setup_node (vm, ethernet_input_node.index);
ethernet_setup_node (vm, ethernet_input_type_node.index);
ethernet_setup_node (vm, ethernet_input_not_l2_node.index);

//初始化sparse_vec,用于根据3层协议来区分下一跳node这个目的。
next_by_ethertype_init (&em->l3_next);

...
}

ethernet_input_inline

完成了该node业务逻辑功能。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
static_always_inline uword ethernet_input_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame,
ethernet_input_variant_t variant)
{
u32 cpu_index = os_get_cpu_number ();

/*
* ETHERNET_INPUT_VARIANT_ETHERNET_TYPE,
* ETHERNET_INPUT_VARIANT_NOT_L2,
* ETHERNET_INPUT_VARIANT_ETHERNET三种模式下,
* 公用ethernet_input_node的error信息。
* 博主没有看出这里有什么特殊的含义。
*/
if (variant != ETHERNET_INPUT_VARIANT_ETHERNET)
error_node = vlib_node_get_runtime (vm, ethernet_input_node.index);
else
error_node = node;

//返回frame尾部保存数据包信息内存的起始地址
from = vlib_frame_vector_args (from_frame);
//frame中的数据包个数
n_left_from = from_frame->n_vectors;

//上次数据包的下一跳这里直接使用,后面有机会修正
next_index = node->cached_next_index;
stats_sw_if_index = node->runtime_data[0];

while (n_left_from > 0)
{
//获取传给下一跳node的保存数据包的缓存
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);

while (n_left_from >= 4 && n_left_to_next >= 2)
{
//操作两个数据包,再预取两个数据包,以下省略,因为下面操作一个数据包和这个方法一致
...
}

while (n_left_from > 0 && n_left_to_next > 0)
{
//预取下一个报文
if (n_left_from > 1)
{
vlib_buffer_t *p2;

p2 = vlib_get_buffer (vm, from[1]);
vlib_prefetch_buffer_header (p2, STORE);
CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
}

bi0 = from[0];
to_next[0] = bi0;
from += 1;
to_next += 1;
n_left_from -= 1;
n_left_to_next -= 1;

b0 = vlib_get_buffer (vm, bi0);

error0 = ETHERNET_ERROR_NONE;

//解析2层信息,有多重封装的也解封,
//最终把vlib_buffer_t->current_data指向三层头部。
parse_header (variant, b0, &type0,
&orig_type0, &outer_id0, &inner_id0, &match_flags0);

old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];

eth_vlan_table_lookups (em,
vnm, old_sw_if_index0, orig_type0, outer_id0, inner_id0,
&hi0, &main_intf0, &vlan_intf0, &qinq_intf0);

identify_subint (hi0, b0, match_flags0, main_intf0, vlan_intf0,
qinq_intf0, &new_sw_if_index0, &error0, &is_l20);

// Save RX sw_if_index for later nodes
vnet_buffer (b0)->sw_if_index[VLIB_RX] =
error0 != ETHERNET_ERROR_NONE ?
old_sw_if_index0 : new_sw_if_index0;

/*
* Increment subinterface stats
* Note that interface-level counters have already been incremented
* prior to calling this function. Thus only subinterface counters
* are incremented here.
* Interface level counters include packets received on the main
* interface and all subinterfaces. Subinterface level counters
* include only those packets received on that subinterface
* Increment stats if the subint is valid and it is not the main intf
* 更新统计信息,vpp中大量代码都是先按照预测执行逻辑,随后再修正,
* 或许对代码流水线有帮助,有空再仔细琢磨下。
*/
if ((new_sw_if_index0 != ~0) && (new_sw_if_index0 != old_sw_if_index0))
{
len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
- vnet_buffer (b0)->ethernet.start_of_ethernet_header;

stats_n_packets += 1;
stats_n_bytes += len0;

// Batch stat increments from the same subinterface so counters
// don't need to be incremented for every packet.
if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index))
{
stats_n_packets -= 1;
stats_n_bytes -= len0;

if (new_sw_if_index0 != ~0)
vlib_increment_combined_counter
(vnm->interface_main.combined_sw_if_counters
+ VNET_INTERFACE_COUNTER_RX,
cpu_index, new_sw_if_index0, 1, len0);
if (stats_n_packets > 0)
{
vlib_increment_combined_counter
(vnm->interface_main.combined_sw_if_counters
+ VNET_INTERFACE_COUNTER_RX,
cpu_index, stats_sw_if_index,
stats_n_packets, stats_n_bytes);
stats_n_packets = stats_n_bytes = 0;
}
stats_sw_if_index = new_sw_if_index0;
}
}

if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
is_l20 = 0;

//决定下一跳node,根据设置可以支持按照协议决定下一跳
determine_next_node (em, variant, is_l20, type0, b0, &error0, &next0);

b0->error = error_node->errors[error0];

// verify speculative enqueue
//修正这两个数据包下一跳node
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
bi0, next0);
}

vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}

// Increment any remaining batched stats
if (stats_n_packets > 0)
{
vlib_increment_combined_counter
(vnm->interface_main.combined_sw_if_counters
+ VNET_INTERFACE_COUNTER_RX,
cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
node->runtime_data[0] = stats_sw_if_index;
}

return from_frame->n_vectors;
}

以下几个函数用于hook该node,替代determine_next_node中默认的下一跳node挑选机制。
默认有ETHERNET_TYPE_IP4ETHERNET_TYPE_IP6ETHERNET_TYPE_MPLS_UNICAST三种协议,用户可以自己添加更多协议。可以根据协议来做不同下一跳。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
void ethernet_register_input_type (vlib_main_t * vm,
ethernet_type_t type, u32 node_index)
{
ethernet_main_t *em = &ethernet_main;
ethernet_type_info_t *ti;
u32 i;

ti = ethernet_get_type_info (em, type);
ti->node_index = node_index;
ti->next_index = vlib_node_add_next (vm,
ethernet_input_node.index, node_index);
i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
ASSERT (i == ti->next_index);

i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
ASSERT (i == ti->next_index);

// Add the L3 node for this ethertype to the next nodes structure
next_by_ethertype_register (&em->l3_next, type, ti->next_index);

// Call the registration functions for other nodes that want a mapping
l2bvi_register_input_type (vm, type, node_index);
}

vlan包下一跳判定机制,可以还原ethernet头部,根据这里注册的值作跳转。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
void ethernet_register_l2_input (vlib_main_t * vm, u32 node_index)
{
ethernet_main_t *em = &ethernet_main;
u32 i;

em->l2_next =
vlib_node_add_next (vm, ethernet_input_node.index, node_index);

// Even if we never use these arcs, we have to align the next indices...
i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);

ASSERT (i == em->l2_next);

i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
ASSERT (i == em->l2_next);
}

调用该函数后,大多数下一跳基本就由这里注册的值决定了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// Register a next node for L3 redirect, and enable L3 redirect
void ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index)
{
ethernet_main_t *em = &ethernet_main;
u32 i;

em->redirect_l3 = 1;
em->redirect_l3_next = vlib_node_add_next (vm,
ethernet_input_node.index,
node_index);
//Change the cached next nodes to the redirect node
em->l3_next.input_next_ip4 = em->redirect_l3_next;
em->l3_next.input_next_ip6 = em->redirect_l3_next;
em->l3_next.input_next_mpls = em->redirect_l3_next;

// Even if we never use these arcs, we have to align the next indices...
i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);

ASSERT (i == em->redirect_l3_next);
}

注意:所有文章非特别说明皆为原创。为保证信息与源同步,转载时请务必注明文章出处!谢谢合作 :-)

原始链接:http://zhaozhanxu.com/2016/11/10/VPP/2016-11-10-VPP-Ethernet-Node/

许可协议: "署名-非商用-相同方式共享 4.0" 转载请保留原文链接及作者。