1. 前言
以太头中除了6字节目的MAC地址、6字节源MAC地址外,还有两字节的以太帧类型值,如IPv4为0x0800,ARP为0x0806等,网卡驱动收到以太帧后通过接口函数netif_receive_skb()(netif_rx实际最后也是调用netif_receive_skb)交到上层,而这个接口函数就完成对以太帧类型的区分,交到不同的协议处理程序。如果想自己编写某一以太类型帧的处理程序,需要自己添加相应的代码。以下为Linux内核2.6代码。
2. 数据结构
每种协议都要定义一个packet_type结构,引导进入相关的协议数据处理函数,所有节点组成一个链表(HASH链表)。
/* include/linux/netdevice.h */
struct packet_type {
__be16 type; /* This is really htons(ether_type). */
struct net_device *dev; /* NULL is wildcarded here */
int (*func) (struct sk_buff *,
struct net_device *,
struct packet_type *,
struct net_device *);
void *af_packet_priv;
struct list_head list;
}; |
参数说明:
type:以太帧类型,16位。
dev:所附着的网卡设备,如果为NULL则匹配全部网卡。
func:协议入口接收处理函数。
af_packet_priv:协议私有数据。
list:链表扣。
一般各协议的packet_type结构都是静态存在,初始化时只提供type和func两个参数就可以了,每个协议在初始化时都要将此结构加入到系统类型链表中。
3. 处理函数
3.1 添加节点
/* net/core/dev.c */
/**
* dev_add_pack - add packet handler
* @pt: packet type declaration
*
* Add a protocol handler to the networking stack. The passed &packet_type
* is linked into kernel lists and may not be freed until it has been
* removed from the kernel lists.
*
* This call does not sleep therefore it can not
* guarantee all CPU's that are in middle of receiving packets
* will see the new packet type (until the next received packet).
*/
void dev_add_pack(struct packet_type *pt)
{
int hash;
spin_lock_bh(&ptype_lock);
// 如果类型是全部以太类型,则节点链接到ptype_all链
if (pt->type == htons(ETH_P_ALL)) {
netdev_nit++;
list_add_rcu(&pt->list, &ptype_all);
} else {
// 根据协议类型取个HASH,共15个HASH链表
hash = ntohs(pt->type) & 15;
// 将节点链接到HASH链表中,list_add_rcu是加了smp_wmb()保护的list_add链表操作
list_add_rcu(&pt->list, &ptype_base[hash]);
}
spin_unlock_bh(&ptype_lock);
} |
3.2 删除节点
/**
* __dev_remove_pack - remove packet handler
* @pt: packet type declaration
*
* Remove a protocol handler that was previously added to the kernel
* protocol handlers by dev_add_pack(). The passed &packet_type is removed
* from the kernel lists and can be freed or reused once this function
* returns.
*
* The packet type might still be in use by receivers
* and must not be freed until after all the CPU's have gone
* through a quiescent state.
*/
void __dev_remove_pack(struct packet_type *pt)
{
struct list_head *head;
struct packet_type *pt1;
spin_lock_bh(&ptype_lock);
// 根据协议类型找是在ptype_all表还是某一HASH链表中
if (pt->type == htons(ETH_P_ALL)) {
netdev_nit--;
head = &ptype_all;
} else
head = &ptype_base[ntohs(pt->type) & 15];
// 直接用地址比对进行查找,而不是类型,因为同一个类型也可能有多个节点
list_for_each_entry(pt1, head, list) {
if (pt == pt1) {
list_del_rcu(&pt->list);
goto out;
}
}
printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
out:
spin_unlock_bh(&ptype_lock);
}
/**
* dev_remove_pack - remove packet handler
* @pt: packet type declaration
*
* Remove a protocol handler that was previously added to the kernel
* protocol handlers by dev_add_pack(). The passed &packet_type is removed
* from the kernel lists and can be freed or reused once this function
* returns.
*
* This call sleeps to guarantee that no CPU is looking at the packet
* type after return.
*/
// 只是__dev_remove_pack()的包裹函数
void dev_remove_pack(struct packet_type *pt)
{
__dev_remove_pack(pt);
synchronize_net();
} |
4. 实例
4.1 IP
/* net/ipv4/af_inet.c */
static struct packet_type ip_packet_type = {
.type = __constant_htons(ETH_P_IP),
.func = ip_rcv, // IP接收数据的入口点
};
static int __init inet_init(void)
{
......
dev_add_pack(&ip_packet_type);
...... |
由于IP协议部分不能作为内核模块,所以是没有卸载函数的,没必要调用dev_remove_pack()函数。
4.2 8021q vlan
/* net/8021q/vlan.c */
static struct packet_type vlan_packet_type = {
.type = __constant_htons(ETH_P_8021Q),
.func = vlan_skb_recv, /* VLAN receive method */
};
......
static int __init vlan_proto_init(void)
{
......
dev_add_pack(&vlan_packet_type);
......
static void __exit vlan_cleanup_module(void)
{
......
dev_remove_pack(&vlan_packet_type);
...... |
由于VLAN可为模块方式存在,所以在模块清除函数中要调用dev_remove_pack()。
5. 网络接收
网卡驱动收到数据包构造出skb后,通过接口函数netif_receive_skb()传递到上层进行协议处理分配。
/* net/core/dev.c */
int netif_receive_skb(struct sk_buff *skb)
{
......
// 先查处理所有以太类型的链表各节点
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
......
// 再查指定协议的HASH链表
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
......
// 该函数就是调用个协议的接收函数处理该skb包,进入第三层网络层处理
static __inline__ int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
{
atomic_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} |
6. 结论
通过链表挂接方式,Linux内核可以很容易的添加各种协议的接收处理函数。
数据流程:
网卡驱动--->netif_rx()--->netif_receive_skb()->deliver_skb()->packet_type.func