TCP/IP学习(30)——L2数据链路层的数据包处理详细流程
原文地址:TCP/IP学习(30)——L2数据链路层的数据包处理详细流程 作者:GFree_Wind
作者:gfree.wind@gmail.com
博客:linuxfocus.blog.chinaunix.net
- static int __init net_dev_init(void)
- {
- int i, rc = -ENOMEM;
- BUG_ON(!dev_boot_phase);
- if (dev_proc_init())
- goto out;
- if (netdev_kobject_init())
- goto out;
- INIT_LIST_HEAD(&ptype_all);
- for (i = 0; i < PTYPE_HASH_SIZE; i++)
- INIT_LIST_HEAD(&ptype_base[i]);
- if (register_pernet_subsys(&netdev_net_ops))
- goto out;
- /*
- * Initialise the packet receive queues.
- */
- /*
- 为每个CPU初始化PERCPU的全局变量softnet_data,作为该CPU的接收缓存
- */
- for_each_possible_cpu(i) {
- struct softnet_data *sd = &per_cpu(softnet_data, i);
- ...... ......
- }
- dev_boot_phase = 0;
- /* The loopback device is special if any other network devices
- * is present in a network namespace the loopback device must
- * be present. Since we now dynamically allocate and free the
- * loopback device ensure this invariant is maintained by
- * keeping the loopback device as the first device on the
- * list of network devices. Ensuring the loopback devices
- * is the first device that appears and the last network device
- * that disappears.
- */
- if (register_pernet_device(&loopback_net_ops))
- goto out;
- if (register_pernet_device(&default_device_ops))
- goto out;
- open_softirq(NET_TX_SOFTIRQ, net_tx_action);
- open_softirq(NET_RX_SOFTIRQ, net_rx_action);
- hotcpu_notifier(dev_cpu_callback, 0);
- dst_init();
- dev_mcast_init();
- rc = 0;
- out:
- return rc;
- }
- static const struct net_device_ops e1000_netdev_ops = {
- .ndo_open = e1000_open,
- ...... ......
- };
- static irqreturn_t e1000_intr(int irq, void *data)
- {
- ...... ......
- /*
- 检测是否可以调度NAPI:
- 当没有disable NAPI且没有该网卡对应的NAPI在运行时(保证对应一个网卡的NAPI只有一个实例在运行),即可调度一个新的NAPI。
- NAPI是一种新的网卡数据检查处理方式。基本上是interrupt+poll。详细信息问google
- */
- if (likely(napi_schedule_prep(&adapter->napi))) {
- /*
- 清楚单次的统计信息。
- 刚看到这里时,我也奇怪,为什么total的统计信息要被清零。
- 实际上这些统计信息只是一次NAPI运行的统计信息,并不是网卡总的统计信息。
- 网卡的统计信息为netdev->stats。NAPI运行完会将下面的值加到网卡的统计信息上的。
- */
- adapter->total_tx_bytes = 0;
- adapter->total_tx_packets = 0;
- adapter->total_rx_bytes = 0;
- adapter->total_rx_packets = 0;
- /* 要求调度对应的NAPI实例 */
- __napi_schedule(&adapter->napi);
- } else {
- /* this really should not if it does it is basically a
- * bug, but not a hard error, so enable ints and continue */
- if (!test_bit(__E1000_DOWN, &adapter->flags))
- e1000_irq_enable(adapter);
- }
- return IRQ_HANDLED;
- }
- static void net_rx_action(struct softirq_action *h)
- {
- struct softnet_data *sd = &__get_cpu_var(softnet_data);
- unsigned long time_limit = jiffies + 2;
- int budget = netdev_budget;
- void *have;
- local_irq_disable();
- while (!list_empty(&sd->poll_list)) {
- struct napi_struct *n;
- int work, weight;
- /* If softirq window is exhuasted then punt.
- * Allow this to run for 2 jiffies since which will allow
- * an average latency of 1.5/HZ.
- */
- if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
- goto softnet_break;
- local_irq_enable();
- /* Even though interrupts have been re-enabled, this
- * access is safe because interrupts can only add new
- * entries to the tail of this list, and only ->poll()
- * calls can remove this head entry from the list.
- */
- /* 取得一个网卡的NAPI实例 */
- n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
- have = netpoll_poll_lock(n);
- weight = n->weight;
- /* This NAPI_STATE_SCHED test is for avoiding a race
- * with netpoll's poll_napi(). Only the entity which
- * obtains the lock and sees NAPI_STATE_SCHED set will
- * actually make the ->poll() call. Therefore we avoid
- * accidently calling ->poll() when NAPI is not scheduled.
- */
- work = 0;
- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
- /* poll这个网卡 */
- work = n->poll(n, weight);
- trace_napi_poll(n);
- }
- WARN_ON_ONCE(work > weight);
- budget -= work;
- local_irq_disable();
- /* Drivers must not modify the NAPI state if they
- * consume the entire weight. In such cases this code
- * still "owns" the NAPI instance and therefore can
- * move the instance around on the list at-will.
- */
- if (unlikely(work == weight)) {
- /* 该NAPI的weight消耗完毕,需要处理下一个 */
- if (unlikely(napi_disable_pending(n))) {
- local_irq_enable();
- napi_complete(n);
- local_irq_disable();
- } else
- list_move_tail(&n->poll_list, &sd->poll_list);
- }
- netpoll_poll_unlock(have);
- }
- out:
- net_rps_action_and_irq_enable(sd);
- #ifdef CONFIG_NET_DMA
- /*
- * There may not be any more sk_buffs coming right now, so push
- * any pending DMA copies to hardware
- */
- dma_issue_pending_all();
- #endif
- return;
- softnet_break:
- sd->time_squeeze++;
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- goto out;
- }
- static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
- struct e1000_rx_ring *rx_ring,
- int *work_done, int work_to_do)
- {
- ...... ......
- i = rx_ring->next_to_clean;
- rx_desc = E1000_RX_DESC(*rx_ring, i);
- buffer_info = &rx_ring->buffer_info[i];
- while (rx_desc->status & E1000_RXD_STAT_DD) {
- struct sk_buff *skb;
- u8 status;
- if (*work_done >= work_to_do) //如果已经poll到足够的包,可以跳出返回
- break;
- (*work_done)++;
- rmb(); /* read descriptor and rx_buffer_info after status DD */
- status = rx_desc->status;
- skb = buffer_info->skb;
- buffer_info->skb = NULL;
- /*
- 设置skb->pkt_type:PACKET_BROADCAST等;
- 即数据链路层协议类型
- */
- skb->protocol = eth_type_trans(skb, netdev);
- e1000_receive_skb(adapter, status, rx_desc->special, skb);
- next_desc:
- /* 处理下一个数据包 */
- ...... ......
- }
- ...... ......
- return cleaned;
- }
- static int __netif_receive_skb(struct sk_buff *skb)
- {
- struct packet_type *ptype, *pt_prev;
- rx_handler_func_t *rx_handler;
- struct net_device *orig_dev;
- struct net_device *master;
- struct net_device *null_or_orig;
- struct net_device *orig_or_bond;
- int ret = NET_RX_DROP;
- __be16 type;
- if (!netdev_tstamp_prequeue)
- net_timestamp_check(skb);
- if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
- return NET_RX_SUCCESS;
- /* if we've gotten here through NAPI, check netpoll */
- if (netpoll_receive_skb(skb))
- return NET_RX_DROP;
- if (!skb->skb_iif)
- skb->skb_iif = skb->dev->ifindex;
- /*
- * bonding note: skbs received on inactive slaves should only
- * be delivered to pkt handlers that are exact matches. Also
- * the deliver_no_wcard flag will be set. If packet handlers
- * are sensitive to duplicate packets these skbs will need to
- * be dropped at the handler. The vlan accel path may have
- * already set the deliver_no_wcard flag.
- */
- /*关于网卡的bond的处理, 这个feature我只是了解,所以略过 */
- null_or_orig = NULL;
- orig_dev = skb->dev;
- master = ACCESS_ONCE(orig_dev->master);
- if (skb->deliver_no_wcard)
- null_or_orig = orig_dev;
- else if (master) {
- if (skb_bond_should_drop(skb, master)) {
- skb->deliver_no_wcard = 1;
- null_or_orig = orig_dev; /* deliver only exact match */
- } else
- skb->dev = master;
- }
- __this_cpu_inc(softnet_data.processed);
- /* 初始化l3 header 和 l4 header 的地址*/
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
- /* 得到mac地址长度,准确来说是2层地址的长度 */
- skb->mac_len = skb->network_header - skb->mac_header;
- pt_prev = NULL;
- rcu_read_lock();
- /*
- 省略一些不太相关的代码
- */
- ...... ......
- /*
- 通过2层协议类型作为key,得到相应链表。
- */
- type = skb->protocol;
- list_for_each_entry_rcu(ptype,
- &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
- if (ptype->type == type && (ptype->dev == null_or_orig ||
- ptype->dev == skb->dev || ptype->dev == orig_dev ||
- ptype->dev == orig_or_bond)) {
- if (pt_prev) //找到匹配的协议类型,上传给L3层
- ret = deliver_skb(skb, pt_prev, orig_dev);
- pt_prev = ptype;
- }
- }
- if (pt_prev) {
- ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
- } else {
- kfree_skb(skb);
- /* Jamal, now you will not able to escape explaining
- * me how you were going to use this. :-)
- */
- ret = NET_RX_DROP;
- }
- out:
- rcu_read_unlock();
- return ret;
- }
TCP/IP学习(30)——L2数据链路层的数据包处理详细流程相关推荐
- 沁恒微电子CH9121 集成TCP/IP 协议栈,可实现网络数据包和串口数据的双向透明传输
概述 沁恒微电子CH9121 集成TCP/IP 协议栈,可实现网络数据包和串口数据的双向透明传输,具有TCPCLIENT.TCP SERVER.UDP 3 种工作模式,串口波特率最高可支持到92160 ...
- 【TCP/IP学习笔记1】 C语言讲解
TCP/IP学习笔记(一) 一. TCP/IP结构: TCP/IP是一个四层协议,结构如下: 1.应用层:各种应用程序和协议,如Http.FTP等. 2.传输层:TCP和 ...
- TCP/IP学习笔记(一)(转载)
一.TCP/IP结构: TCP/IP是一个四层协议,结构如下: 1.应用层:各种应用程序和协议,如Http.FTP等. 2.传输层:TCP和UDP TCP提供一 ...
- TCP/IP学习笔记:TCP/IP协议介绍
TCP/IP的通讯协议 这部分简要介绍一下TCP/IP的内部结构,为讨论与互联网有关的安全问题打下基础.TCP/IP协议组之所以流行,部分原因是因为它可以用在各种各样的信道和底层协议(例如T1和X.2 ...
- TCP / IP学习笔记(9)-dns域名系统
TCP / IP学习笔记(9)-dns域名系统 前面已经提到了访问一台机器要靠IP地址和MAC地址,其中,MAC地址可以通过ARP协议得到,所以这对用户是透明的,但是IP地址就不行,无论如何用户都需要 ...
- linux下用C语言实现TCP/IP服务器与客户端互相发送数据的socket编程
linux下用C语言实现TCP/IP服务器与客户端互相发送数据的socket编程 server.c #include <sys/stat.h>#include <fcntl.h> ...
- TCP/IP详解卷1 - wireshark抓包分析
TCP/IP详解卷1 - 系列文 TCP/IP详解卷1 - 思维导图(1) TCP/IP详解卷1 - wireshark抓包分析 引言 在初学TCP/IP协议时,会觉得协议是一种很抽象的东西,通过wi ...
- DPDK 数据包捕获基本流程(十六)
内核组件架构 rte_eal+libc:内存的统一组织管理者,但是在这它不只是做内存工作. librte_malloc:对外提供分配释放内存的API,分配的内存都是rte_eal中所管理的内存. li ...
- TCP/IP学习笔记(2)-数据链路层
数据链路层有三个目的: 为IP模块发送和接收IP数据报. 为ARP模块发送ARP请求和接收ARP应答. 为RARP发送RARP请求和接收RARP应答 ip大家都听说过.至于ARP和RARP,ARP叫做 ...
最新文章
- Unity GUI(uGUI)使用心得与性能总结
- 基于网络音频的Android播放程序简单示例
- nodejs即时聊天
- 2018年终总结—努力做一个有趣的人
- c# url编码 字母编码_我如何通过每天30分钟编码来完成#100DaysOfCode挑战
- 初探下一代SIEM核心技术发展趋势
- 高德百度坐标系转换方法
- h3c服务器显示非法的文件,H3C License server 故障处理手册-5W201
- 数据库基础:MySQL必备的三个工具
- klwp主题大全_klwp主题包百度网盘版下载-klwp主题包百度云版_5577安卓网
- 监控数据恢复取证-盘点进水监控硬盘的数据恢复
- 【408:计算机组成原理】起源:带你速看计算机伟大历史
- 51单片机实验 7段数码管静态显示数字
- 禾穗HERS | 不结婚就不孝?催婚季必备三招快学起来!
- 教你如何不显示excel中 N/A
- 理财公司天基实业如何投资理财收益最大化
- Linux内存机制浅见——从内存布局到线程局部存储TLS
- Python123.io---星号下三角形
- 盲盒商城系统创业是怎么一回事儿?
- 马斯克「萌生退意」:这推特我还干不干,你们说了算