dpdk实例flow_classify
文章目录
- 一.前言
- 二.源码
- 三.运行情况
- 四.改动ipv4_rules_file.txt
一.前言
Flow Classify示例应用程序基于转发应用程序的简单框架示例。
它旨在演示使用Flow Classify库API的DPDK转发应用程序的基本组件。
flow_classify例子对于DPDK的学习具有很重要的意义,是比较重要的章节。有点类似于linux网络中的iptables功能,也有点类似于我们在linux内核中开发的防火墙功能。我们可以使用flow模块对数据包进行统计,丢弃等基本的操作。
二.源码
/* SPDX-License-Identifier: BSD-3-Clause* Copyright(c) 2017 Intel Corporation*/#include <stdint.h>
#include <inttypes.h>
#include <getopt.h>#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#include <rte_flow.h>
#include <rte_flow_classify.h>
#include <rte_table_acl.h>#define RX_RING_SIZE 1024
#define TX_RING_SIZE 1024#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32#define MAX_NUM_CLASSIFY 30
#define FLOW_CLASSIFY_MAX_RULE_NUM 91
#define FLOW_CLASSIFY_MAX_PRIORITY 8
#define FLOW_CLASSIFIER_NAME_SIZE 64#define COMMENT_LEAD_CHAR ('#')
#define OPTION_RULE_IPV4 "rule_ipv4"
#define RTE_LOGTYPE_FLOW_CLASSIFY RTE_LOGTYPE_USER3
#define flow_classify_log(format, ...) \RTE_LOG(ERR, FLOW_CLASSIFY, format, ##__VA_ARGS__)#define uint32_t_to_char(ip, a, b, c, d) do {\*a = (unsigned char)(ip >> 24 & 0xff);\*b = (unsigned char)(ip >> 16 & 0xff);\*c = (unsigned char)(ip >> 8 & 0xff);\*d = (unsigned char)(ip & 0xff);\} while (0)enum {CB_FLD_SRC_ADDR,//0CB_FLD_DST_ADDR,//1CB_FLD_SRC_PORT,//2CB_FLD_SRC_PORT_DLM,//3CB_FLD_SRC_PORT_MASK,//4CB_FLD_DST_PORT,//5CB_FLD_DST_PORT_DLM,//6CB_FLD_DST_PORT_MASK,//7CB_FLD_PROTO,//8CB_FLD_PRIORITY,//9CB_FLD_NUM,//10
};
/* 一条 rule 占一行,格式,以及分词后的在in数组内的下标如下:#源IP/前缀 目的IP/前缀 源端口号 : 掩码 目的端口号 : 掩码 协议/掩码 优先级2.2.2.3/24 2.2.2.7/24 32 : 0xffff 33 : 0xffff 17/0xff 00 1 2 3 4 5 6 7 8 9 ← in数组下标 用上述枚举类型来进行表示in数组的下标*/
static struct{const char *rule_ipv4_name;
} parm_config;// 用于文件访问的
const char cb_port_delim[] = ":";//网口默认配置,RX接收的数据包大小默认为ETHER链路帧包的最大值(MTU)
static const struct rte_eth_conf port_conf_default = {.rxmode = {.max_rx_pkt_len = RTE_ETHER_MAX_LEN,},
};struct flow_classifier {struct rte_flow_classifier *cls;
};
/*
struct rte_flow_classifier {// classifier的参数,要 create() 时传入结构体。char name[RTE_FLOW_CLASSIFIER_MAX_NAME_SZ];int socket_id;// 其余的内部字段// n tuple 过滤器,也就是流规则的匹配项目了。struct rte_eth_ntuple_filter ntuple_filter;// tablesstruct rte_cls_table tables[RTE_FLOW_CLASSIFY_TABLE_MAX];uint32_t table_mask;uint32_t num_tables;uint16_t nb_pkts;struct rte_flow_classify_table_entry*entries[RTE_PORT_IN_BURST_SIZE_MAX];
} __rte_cache_aligned;
*/struct flow_classifier_acl {struct flow_classifier cls;
} __rte_cache_aligned;/* ACL field definitions for IPv4 5 tuple rule */enum {PROTO_FIELD_IPV4,SRC_FIELD_IPV4,DST_FIELD_IPV4,SRCP_FIELD_IPV4,DSTP_FIELD_IPV4,NUM_FIELDS_IPV4
};enum {PROTO_INPUT_IPV4,SRC_INPUT_IPV4,DST_INPUT_IPV4,SRCP_DESTP_INPUT_IPV4
};
/* 数据结构 rte_acl_field_def:ACL 访问控制表的字段的定义
ACL规则中的每个字段都有一个关联定义。有五个,分别是:
字段的类型 type:
RTE_ACL_FIELD_TYPE_BITMASK:单字节区域如ip头部一个字节的proto字段;
RTE_ACL_FIELD_TYPE_MASK:采用MASK方式描述,一般对应4字节的源/目的地址;
RTE_ACL_FIELD_TYPE_RANGE:一般对应TCP或UDP头部2字节的PORT区域。
字段的字节数大小 size,
字段的索引(指示哪一个字段)field_index 一个0开始的值,用来指定字段在规则内部的位置,0~n-1表示n个字段。
输入索引 input_index(0-N) 所有输入字段,除了第一个,其他必须以4个连续字节分组,这个input_index就是来指定字段在那个组
偏移量offset 定义了字段的偏移量,为查找指定从缓冲区的起始位置的偏移。
*//*
rule “规则” 有一些独有规则:1. 规则定义的第一个字段必须是一个字节的长度2. 之后的字段必须以4个连续的字节分组这主要是为性能考虑,查找函数处理第一个输入字节做为这个流的设置的一部分,然后这查找函数的内部循环被展开来同时处理4字节的输入。
*/static struct rte_acl_field_def ipv4_defs[NUM_FIELDS_IPV4] = {// 共 5 个字段,每个字段都要有一个关联的五个定义/* first input field - always one byte long. */{.type = RTE_ACL_FIELD_TYPE_BITMASK,.size = sizeof(uint8_t),//1个字节.field_index = PROTO_FIELD_IPV4,.input_index = PROTO_INPUT_IPV4,.offset = sizeof(struct rte_ether_hdr) +offsetof(struct rte_ipv4_hdr, next_proto_id),},/* next input field (IPv4 source address) - 4 consecutive bytes. */{/* rte_flow uses a bit mask for IPv4 addresses */// 第二个字段 源IP地址.type = RTE_ACL_FIELD_TYPE_BITMASK,.size = sizeof(uint32_t),.field_index = SRC_FIELD_IPV4,.input_index = SRC_INPUT_IPV4,.offset = sizeof(struct rte_ether_hdr) +offsetof(struct rte_ipv4_hdr, src_addr),},/* next input field (IPv4 destination address) - 4 consecutive bytes. */{/* rte_flow uses a bit mask for IPv4 addresses */// 第三个字段 目的IP地址.type = RTE_ACL_FIELD_TYPE_BITMASK,.size = sizeof(uint32_t),.field_index = DST_FIELD_IPV4,.input_index = DST_INPUT_IPV4,.offset = sizeof(struct rte_ether_hdr) +offsetof(struct rte_ipv4_hdr, dst_addr),},/** Next 2 fields (src & dst ports) form 4 consecutive bytes.* They share the same input index.*/{/* rte_flow uses a bit mask for protocol ports */// 接下来的 两个端口号 才组成一个 4 字节,所以共享同样的一个 input index.type = RTE_ACL_FIELD_TYPE_BITMASK,.size = sizeof(uint16_t),.field_index = SRCP_FIELD_IPV4,.input_index = SRCP_DESTP_INPUT_IPV4,.offset = sizeof(struct rte_ether_hdr) +sizeof(struct rte_ipv4_hdr) +offsetof(struct rte_tcp_hdr, src_port),},{/* rte_flow uses a bit mask for protocol ports */// 第三个字段 传输层协议.type = RTE_ACL_FIELD_TYPE_BITMASK,.size = sizeof(uint16_t),.field_index = DSTP_FIELD_IPV4,.input_index = SRCP_DESTP_INPUT_IPV4,.offset = sizeof(struct rte_ether_hdr) +sizeof(struct rte_ipv4_hdr) +offsetof(struct rte_tcp_hdr, dst_port),},
};/* flow classify data */
static int num_classify_rules;// rules数组的下标
static struct rte_flow_classify_rule *rules[MAX_NUM_CLASSIFY];// rules 数组
static struct rte_flow_classify_ipv4_5tuple_stats ntuple_stats;
static struct rte_flow_classify_stats classify_stats = {.stats = (void **)&ntuple_stats// 有计数功能
};/* parameters for rte_flow_classify_validate and* rte_flow_classify_table_entry_add functions*/
/* rte_flow_item 四个字段:
1. type,是 enum 定义。见 rte_flow.h:http://doc.dpdk.org/api/rte__flow_8h_source.html
2. spec,指向相关项类型结构的有效指针,在许多情况下,可以设置成 NULL以请求广泛(非特定)匹配。在此情况下,last 和 mask 也要设置成 NULL
3. last,可以指向相同类型的结构,以定义包含范围。
4. Mask,是在解释spec和last的内容之前应用的简单位掩码
*/
static struct rte_flow_item eth_item = { RTE_FLOW_ITEM_TYPE_ETH,0, 0, 0 };
static struct rte_flow_item end_item = { RTE_FLOW_ITEM_TYPE_END,0, 0, 0 };/* sample actions:* "actions count / end"*/
struct rte_flow_query_count count = {// 计数器查询的结构体.reset = 1,// Reset counters after query.hits_set = 1,// 启用 hits 字段.bytes_set = 1,// 启用 bytes字段.hits = 0,// Number of hits for this rule.bytes = 0,
};
static struct rte_flow_action count_action = { RTE_FLOW_ACTION_TYPE_COUNT,&count};
static struct rte_flow_action end_action = { RTE_FLOW_ACTION_TYPE_END, 0};
static struct rte_flow_action actions[2];// rte_flow_action 见 programmers’ guides 的第九章 :http://doc.dpdk.org/guides/prog_guide/rte_flow.html
// actions 数组代表当 pkt 被 pattern 匹配时要执行的一系列操作。
// 在这个例子里,数组长度为二,actions[0] 就是计数,actions[1] 就是用来提示结尾。// rte_flow_action的具体定义不清楚
// 估计第一个字段是 enum rte_flow_action_type ,具体的 enum 定义见:http://doc.dpdk.org/api/rte__flow_8h.html#a78f0386e683cfc491462a771df8b971a
// 第二个字段计数器查询的结构体
/* sample attributes */
static struct rte_flow_attr attr;/* flow_classify.c: * Based on DPDK skeleton forwarding example. *//** Initializes a given port using global settings and with the RX buffers* coming from the mbuf_pool passed as a parameter.*///端口初始化
//1.获取可用 eth 的个数
//2.配置网卡设备
//3.每个 port 1 个 rx 队列
//4.每个 port 1 个 tx 队列
//5.启用网卡设备
//6.设置网卡混杂模式
static inline int
port_init(uint8_t port, struct rte_mempool *mbuf_pool)
{struct rte_eth_conf port_conf = port_conf_default;struct rte_ether_addr addr;const uint16_t rx_rings = 1, tx_rings = 1;int retval;uint16_t q;struct rte_eth_dev_info dev_info;struct rte_eth_txconf txconf;if (!rte_eth_dev_is_valid_port(port))return -1;retval = rte_eth_dev_info_get(port, &dev_info);if (retval != 0) {printf("Error during getting device (port %u) info: %s\n",port, strerror(-retval));return retval;}if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)port_conf.txmode.offloads |=DEV_TX_OFFLOAD_MBUF_FAST_FREE;/* Configure the Ethernet device. */retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);if (retval != 0)return retval;/* Allocate and set up 1 RX queue per Ethernet port. */for (q = 0; q < rx_rings; q++) {retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,rte_eth_dev_socket_id(port), NULL, mbuf_pool);if (retval < 0)return retval;}txconf = dev_info.default_txconf;txconf.offloads = port_conf.txmode.offloads;/* Allocate and set up 1 TX queue per Ethernet port. */for (q = 0; q < tx_rings; q++) {retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,rte_eth_dev_socket_id(port), &txconf);if (retval < 0)return retval;}/* Start the Ethernet port. */retval = rte_eth_dev_start(port);if (retval < 0)return retval;/* Display the port MAC address. */retval = rte_eth_macaddr_get(port, &addr);if (retval != 0)return retval;printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8" %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",port,addr.addr_bytes[0], addr.addr_bytes[1],addr.addr_bytes[2], addr.addr_bytes[3],addr.addr_bytes[4], addr.addr_bytes[5]);/* Enable RX in promiscuous mode for the Ethernet device. */retval = rte_eth_promiscuous_enable(port);if (retval != 0)return retval;return 0;
}/** The lcore main. This is the main thread that does the work, reading from* an input port classifying the packets and writing to an output port.*/
//这是完成工作的主线程,它从输入端口读取并写入输出端口。
static __rte_noreturn void
lcore_main(struct flow_classifier *cls_app)
{uint16_t port;int ret;int i = 0;//测试:删除一条规则ret = rte_flow_classify_table_entry_delete(cls_app->cls,rules[7]);if (ret)printf("table_entry_delete failed [7] %d\n\n", ret);elseprintf("table_entry_delete succeeded [7]\n\n");/** Check that the port is on the same NUMA node as the polling thread* for best performance.*///检查端口是否与轮询线程在同一NUMA节点上,以获得最佳性能。RTE_ETH_FOREACH_DEV(port)//遍历每个端口if (rte_eth_dev_socket_id(port) > 0 &&rte_eth_dev_socket_id(port) != (int)rte_socket_id()) {printf("\n\n");printf("WARNING: port %u is on remote NUMA node\n",port);printf("to polling thread.\n");printf("Performance will not be optimal.\n");}printf("\nCore %u forwarding packets. ", rte_lcore_id());printf("[Ctrl+C to quit]\n");/* Run until the application is quit or killed. */for (;;) {/** Receive packets on a port, classify them and forward them* on the paired port.* The mapping is 0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2, etc.*/RTE_ETH_FOREACH_DEV(port) {/* Get burst of RX packets, from first port of pair. */struct rte_mbuf *bufs[BURST_SIZE];const uint16_t nb_rx = rte_eth_rx_burst(port, 0,bufs, BURST_SIZE);if (unlikely(nb_rx == 0))continue;for (i = 0; i < MAX_NUM_CLASSIFY; i++) {if (rules[i]) {// 对classifier里的每条规则(用一个数组来保存插入成功时返回的rule指针)/* rte_flow_classifier_query(),查看burst中是否有任何数据包与表中的一条流规则匹配。参数:流分类器句柄、要处理的数据包的mbuf一个burst的数据包数量、要查询的规则、查询的stat */ret = rte_flow_classifier_query(cls_app->cls,bufs, nb_rx, rules[i],&classify_stats);if (ret)printf("rule [%d] query failed ret [%d]\n\n",i, ret);else {// 返回 0 代表有matchprintf("rule[%d] count=%"PRIu64"\n",i, ntuple_stats.counter1);printf("proto = %d\n",ntuple_stats.ipv4_5tuple.proto);}}}/* Send burst of TX packets, to second port of pair. */const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,bufs, nb_rx);// port 异或 1 --> 0就和1是一对,2就和3是一对。// 0 收到包就从 1 转发, 3 收到包 就从 2 口转发。//对于发不出去的包就把内存释放掉,也就是drop这些包//对于DPDK的收包和转发来说,都是一次处理多个数据包//原因是cache行的内存对齐可以一次处理多个地址//并且可以充分利用处理器内部的乱序执行和并行处理能力/* Free any unsent packets. */if (unlikely(nb_tx < nb_rx)) {uint16_t buf;for (buf = nb_tx; buf < nb_rx; buf++)rte_pktmbuf_free(bufs[buf]);}}}
}/** Parse IPv4 5 tuple rules file, ipv4_rules_file.txt.* Expected format:* <src_ipv4_addr>'/'<masklen> <space> \* <dst_ipv4_addr>'/'<masklen> <space> \* <src_port> <space> ":" <src_port_mask> <space> \* <dst_port> <space> ":" <dst_port_mask> <space> \* <proto>'/'<proto_mask> <space> \* <priority>*/
static int
get_cb_field(char **in, uint32_t *fd, int base, unsigned long lim,char dlm)
{unsigned long val;char *end;errno = 0;val = strtoul(*in, &end, base);/* unsigned long int strtoul(const char *str, char **endptr, int base) 把参数 str 所指向的字符串根据给定的 base 转换为一个无符号长整数(unsigned long int 型)。str -- 要转换为无符号长整数的字符串。endptr -- 对类型为 char* 的对象的引用,其值会由函数设置为 str 中数值后的下一个字符。(end 会指向点分十进制中的下一个点)base -- 基数,必须介于 2 和 36(包含)之间,或者是特殊值 0。当base = 0,自动判断字符串的类型,并按10进制输出,例如"0xa", 就会把字符串当做16进制处理,输出为 10。参考:http://www.runoob.com/cprogramming/c-function-strtoul.htmlhttps://blog.csdn.net/chuhongcai/article/details/52032926*/if (errno != 0 || end[0] != dlm || val > lim)return -EINVAL;*fd = (uint32_t)val;*in = end + 1;例如 2.2.2.3 会依次转换 2 2 2 3return 0;
}static int
parse_ipv4_net(char *in, uint32_t *addr, uint32_t *mask_len)
{uint32_t a, b, c, d, m;
// 这四个if是判断IP地址的每个点分十进制是否小于255(UINT8_MAX)if (get_cb_field(&in, &a, 0, UINT8_MAX, '.'))return -EINVAL;if (get_cb_field(&in, &b, 0, UINT8_MAX, '.'))return -EINVAL;if (get_cb_field(&in, &c, 0, UINT8_MAX, '.'))return -EINVAL;if (get_cb_field(&in, &d, 0, UINT8_MAX, '/'))return -EINVAL;// 后缀要小于32if (get_cb_field(&in, &m, 0, sizeof(uint32_t) * CHAR_BIT, 0))return -EINVAL;addr[0] = RTE_IPV4(a, b, c, d);mask_len[0] = m;return 0;
}static int
parse_ipv4_5tuple_rule(char *str, struct rte_eth_ntuple_filter *ntuple_filter)
// 将 txt 中一行输入,转换成一个 rte_eth_ntuple_filter 结构体。
{int i, ret;char *s, *sp, *in[CB_FLD_NUM];static const char *dlm = " \t\n";int dim = CB_FLD_NUM;//10uint32_t temp;s = str;for (i = 0; i != dim; i++, s = NULL) {in[i] = strtok_r(s, dlm, &sp);// linux下的字符串切割函数:strtok_r/* char *strtok_r(char *str, const char *delim, char **saveptr);在str中,返回由delim指定的分界符分开str的单词。参考链接:https://blog.csdn.net/hustfoxy/article/details/23473805*/if (in[i] == NULL)return -EINVAL;}
/* 一条 rule 占一行,格式,以及分词后的在in数组内的下标如下:#源IP/前缀 目的IP/前缀 源端口号 : 掩码 目的端口号 : 掩码 协议/掩码 优先级2.2.2.3/24 2.2.2.7/24 32 : 0xffff 33 : 0xffff 17/0xff 00 1 2 3 4 5 6 7 8 9 ← in数组下标 */ret = parse_ipv4_net(in[CB_FLD_SRC_ADDR],// 解析 src_ip 得到IP地址和掩码,放到 ntuple_filter的对应字段里&ntuple_filter->src_ip,&ntuple_filter->src_ip_mask);if (ret != 0) {flow_classify_log("failed to read source address/mask: %s\n",in[CB_FLD_SRC_ADDR]);return ret;}ret = parse_ipv4_net(in[CB_FLD_DST_ADDR],// 解析 dst_ip 得到IP地址和掩码,放到 ntuple_filter的对应字段里&ntuple_filter->dst_ip,&ntuple_filter->dst_ip_mask);if (ret != 0) {flow_classify_log("failed to read source address/mask: %s\n",in[CB_FLD_DST_ADDR]);return ret;}if (get_cb_field(&in[CB_FLD_SRC_PORT], &temp, 0, UINT16_MAX, 0))// 源端口号字符串转 unsigned long ,验证不能大于16位无符号数的最大值。return -EINVAL;ntuple_filter->src_port = (uint16_t)temp;
// 放到 ntuple_filter的对应字段里if (strncmp(in[CB_FLD_SRC_PORT_DLM], cb_port_delim,sizeof(cb_port_delim)) != 0)// 检查分隔符是否为: 不然是格式错误。return -EINVAL;if (get_cb_field(&in[CB_FLD_SRC_PORT_MASK], &temp, 0, UINT16_MAX, 0))// 源端口号掩码return -EINVAL;ntuple_filter->src_port_mask = (uint16_t)temp;if (get_cb_field(&in[CB_FLD_DST_PORT], &temp, 0, UINT16_MAX, 0)) // 目的端口号return -EINVAL;ntuple_filter->dst_port = (uint16_t)temp;if (strncmp(in[CB_FLD_DST_PORT_DLM], cb_port_delim,sizeof(cb_port_delim)) != 0)// 检查分隔符是否为: 不然是格式错误。return -EINVAL;if (get_cb_field(&in[CB_FLD_DST_PORT_MASK], &temp, 0, UINT16_MAX, 0))// 目的端口号掩码return -EINVAL;ntuple_filter->dst_port_mask = (uint16_t)temp;if (get_cb_field(&in[CB_FLD_PROTO], &temp, 0, UINT8_MAX, '/'))//协议号return -EINVAL;ntuple_filter->proto = (uint8_t)temp;if (get_cb_field(&in[CB_FLD_PROTO], &temp, 0, UINT8_MAX, 0))return -EINVAL;//协议号掩码ntuple_filter->proto_mask = (uint8_t)temp;if (get_cb_field(&in[CB_FLD_PRIORITY], &temp, 0, UINT16_MAX, 0))//优先级return -EINVAL;ntuple_filter->priority = (uint16_t)temp;if (ntuple_filter->priority > FLOW_CLASSIFY_MAX_PRIORITY)ret = -EINVAL;return ret;
}/* Bypass comment and empty lines */
static inline int
is_bypass_line(char *buff)
{int i = 0;/* comment line */if (buff[0] == COMMENT_LEAD_CHAR)return 1;/* empty line */while (buff[i] != '\0') {if (!isspace(buff[i]))return 0;i++;}return 1;
}
//转换为bitmask
static uint32_t
convert_depth_to_bitmask(uint32_t depth_val)
{uint32_t bitmask = 0;int i, j;for (i = depth_val, j = 0; i > 0; i--, j++)bitmask |= (1 << (31 - j));return bitmask;
}// 对 rte_flow_classify_table_entry_add() 的一层封装,主要是设定好参数,从rte_eth_ntuple_filter 转换成 flow_item
static int
add_classify_rule(struct rte_eth_ntuple_filter *ntuple_filter,struct flow_classifier *cls_app)
{int ret = -1;int key_found;struct rte_flow_error error;struct rte_flow_item_ipv4 ipv4_spec;struct rte_flow_item_ipv4 ipv4_mask;struct rte_flow_item ipv4_udp_item;struct rte_flow_item ipv4_tcp_item;struct rte_flow_item ipv4_sctp_item;struct rte_flow_item_udp udp_spec;struct rte_flow_item_udp udp_mask;struct rte_flow_item udp_item;struct rte_flow_item_tcp tcp_spec;struct rte_flow_item_tcp tcp_mask;struct rte_flow_item tcp_item;struct rte_flow_item_sctp sctp_spec;struct rte_flow_item_sctp sctp_mask;struct rte_flow_item sctp_item;struct rte_flow_item pattern_ipv4_5tuple[4];struct rte_flow_classify_rule *rule;uint8_t ipv4_proto;if (num_classify_rules >= MAX_NUM_CLASSIFY) {printf("\nINFO: classify rule capacity %d reached\n",num_classify_rules);return ret;}/* set up parameters for validate and add */// 把这三个参数从ntuple_filter结构体提取到 rte_flow_item_ipv4 的一个专门的结构体:ipv4_specmemset(&ipv4_spec, 0, sizeof(ipv4_spec));ipv4_spec.hdr.next_proto_id = ntuple_filter->proto;ipv4_spec.hdr.src_addr = ntuple_filter->src_ip;ipv4_spec.hdr.dst_addr = ntuple_filter->dst_ip;ipv4_proto = ipv4_spec.hdr.next_proto_id;// 把这三个参数从ntuple_filter结构体提取到 rte_flow_item_ipv4 的一个专门的结构体 :ipv4_maskmemset(&ipv4_mask, 0, sizeof(ipv4_mask));ipv4_mask.hdr.next_proto_id = ntuple_filter->proto_mask;ipv4_mask.hdr.src_addr = ntuple_filter->src_ip_mask;ipv4_mask.hdr.src_addr =convert_depth_to_bitmask(ipv4_mask.hdr.src_addr);ipv4_mask.hdr.dst_addr = ntuple_filter->dst_ip_mask;ipv4_mask.hdr.dst_addr =convert_depth_to_bitmask(ipv4_mask.hdr.dst_addr);switch (ipv4_proto) {case IPPROTO_UDP:ipv4_udp_item.type = RTE_FLOW_ITEM_TYPE_IPV4;ipv4_udp_item.spec = &ipv4_spec;ipv4_udp_item.mask = &ipv4_mask;ipv4_udp_item.last = NULL;udp_spec.hdr.src_port = ntuple_filter->src_port;udp_spec.hdr.dst_port = ntuple_filter->dst_port;udp_spec.hdr.dgram_len = 0;udp_spec.hdr.dgram_cksum = 0;udp_mask.hdr.src_port = ntuple_filter->src_port_mask;udp_mask.hdr.dst_port = ntuple_filter->dst_port_mask;udp_mask.hdr.dgram_len = 0;udp_mask.hdr.dgram_cksum = 0;udp_item.type = RTE_FLOW_ITEM_TYPE_UDP;udp_item.spec = &udp_spec;udp_item.mask = &udp_mask;udp_item.last = NULL;attr.priority = ntuple_filter->priority;pattern_ipv4_5tuple[1] = ipv4_udp_item;pattern_ipv4_5tuple[2] = udp_item;break;case IPPROTO_TCP:ipv4_tcp_item.type = RTE_FLOW_ITEM_TYPE_IPV4;ipv4_tcp_item.spec = &ipv4_spec;ipv4_tcp_item.mask = &ipv4_mask;ipv4_tcp_item.last = NULL;memset(&tcp_spec, 0, sizeof(tcp_spec));tcp_spec.hdr.src_port = ntuple_filter->src_port;tcp_spec.hdr.dst_port = ntuple_filter->dst_port;memset(&tcp_mask, 0, sizeof(tcp_mask));tcp_mask.hdr.src_port = ntuple_filter->src_port_mask;tcp_mask.hdr.dst_port = ntuple_filter->dst_port_mask;tcp_item.type = RTE_FLOW_ITEM_TYPE_TCP;tcp_item.spec = &tcp_spec;tcp_item.mask = &tcp_mask;tcp_item.last = NULL;attr.priority = ntuple_filter->priority;pattern_ipv4_5tuple[1] = ipv4_tcp_item;pattern_ipv4_5tuple[2] = tcp_item;break;case IPPROTO_SCTP:ipv4_sctp_item.type = RTE_FLOW_ITEM_TYPE_IPV4;ipv4_sctp_item.spec = &ipv4_spec;ipv4_sctp_item.mask = &ipv4_mask;ipv4_sctp_item.last = NULL;sctp_spec.hdr.src_port = ntuple_filter->src_port;sctp_spec.hdr.dst_port = ntuple_filter->dst_port;sctp_spec.hdr.cksum = 0;sctp_spec.hdr.tag = 0;sctp_mask.hdr.src_port = ntuple_filter->src_port_mask;sctp_mask.hdr.dst_port = ntuple_filter->dst_port_mask;sctp_mask.hdr.cksum = 0;sctp_mask.hdr.tag = 0;sctp_item.type = RTE_FLOW_ITEM_TYPE_SCTP;sctp_item.spec = &sctp_spec;sctp_item.mask = &sctp_mask;sctp_item.last = NULL;attr.priority = ntuple_filter->priority;pattern_ipv4_5tuple[1] = ipv4_sctp_item;pattern_ipv4_5tuple[2] = sctp_item;break;default:return ret;}attr.ingress = 1;// rules 适用于入口流量pattern_ipv4_5tuple[0] = eth_item;// L2 item,放在pattern_ipv4_5tuple[0],一定是eth_item// L3 item 放在数组下标1,L4 item放在数组下标2pattern_ipv4_5tuple[3] = end_item;// 最后一个 item 一定要用 end_item 结尾。actions[0] = count_action;// 流匹配的动作是 计数actions[1] = end_action;/* Validate and add rule *//* rte_flow_classify_validate验证这条规则的有效性参数:1. classifer 指针2. attr 指针,流规则的属性,详细内容见上。3. rte_flow_item 结构体数组(terminated by the END pattern item),也就是 ACL 规则的详细内容4. rte_flow_action 结构体数组(terminated by the END pattern item),表示流规则的动作,比如QUEUE, DROP, END等等,5. struct rte_flow_error,出错时存放信息。*/ret = rte_flow_classify_validate(cls_app->cls, &attr,pattern_ipv4_5tuple, actions, &error);if (ret) {printf("table entry validate failed ipv4_proto = %u\n",ipv4_proto);return ret;}// 调用 rte_flow_classify_table_entry_add() 将规则添加到 rte_flow_classifier 对象中的 table。/* 五个参数1. classifier 的指针。2. attr 指针。3. rte_flow_item 结构体数组,也就是 ACL 规则的详细内容。4. rte_flow_action 结构体数组,表示流规则的动作。5. 一个int指针,如果规则已经存在则返回1,否则返回0。6. 仅出错时存放信息。*/rule = rte_flow_classify_table_entry_add(cls_app->cls, &attr, pattern_ipv4_5tuple,actions, &key_found, &error);if (rule == NULL) { // 添加成功时返回的是rule的有效句柄,否则为NULLprintf("table entry add failed ipv4_proto = %u\n",ipv4_proto);ret = -1;return ret;}rules[num_classify_rules] = rule;// 将rule存放在一个数组里,方便删除等操作num_classify_rules++;return 0;
}// 封装一层,主要是文件操作,把txt中的一行解析成 rte_eth_ntuple_filter 结构体
static int
add_rules(const char *rule_path, struct flow_classifier *cls_app)
{FILE *fh;char buff[LINE_MAX];unsigned int i = 0;unsigned int total_num = 0;struct rte_eth_ntuple_filter ntuple_filter;int ret;fh = fopen(rule_path, "rb");if (fh == NULL)rte_exit(EXIT_FAILURE, "%s: fopen %s failed\n", __func__,rule_path);ret = fseek(fh, 0, SEEK_SET);// 设置文件指针fh的位置指向文件开头if (ret)rte_exit(EXIT_FAILURE, "%s: fseek %d failed\n", __func__,ret);i = 0;while (fgets(buff, LINE_MAX, fh) != NULL) {// 读取一行内容i++;if (is_bypass_line(buff))// 跳过空行 or 以井号开头的注释continue;if (total_num >= FLOW_CLASSIFY_MAX_RULE_NUM - 1) {// 有最大规则数量(行数)限制printf("\nINFO: classify rule capacity %d reached\n",total_num);break;}if (parse_ipv4_5tuple_rule(buff, &ntuple_filter) != 0)rte_exit(EXIT_FAILURE,"%s Line %u: parse rules error\n",rule_path, i);// 规则的 parser 解析txt的一行输入,存放到ntuple_filter结构体里if (add_classify_rule(&ntuple_filter, cls_app) != 0)// 添加这条五元组规则到 ACL 中rte_exit(EXIT_FAILURE, "add rule error\n");total_num++;}fclose(fh);return 0;
}/* display usage */
static void
print_usage(const char *prgname)
{printf("%s usage:\n", prgname);printf("[EAL options] -- --"OPTION_RULE_IPV4"=FILE: ");printf("specify the ipv4 rules file.\n");printf("Each rule occupies one line in the file.\n");
}/* Parse the argument given in the command line of the application */
// 解析执行 flow_classify 的命令行参数
static int
parse_args(int argc, char **argv)
{int opt, ret;char **argvopt;int option_index;char *prgname = argv[0];static struct option lgopts[] = {{OPTION_RULE_IPV4, 1, 0, 0},{NULL, 0, 0, 0}};argvopt = argv;while ((opt = getopt_long(argc, argvopt, "",lgopts, &option_index)) != EOF) {switch (opt) {/* long options */case 0:if (!strncmp(lgopts[option_index].name,OPTION_RULE_IPV4,sizeof(OPTION_RULE_IPV4)))parm_config.rule_ipv4_name = optarg;break;default:print_usage(prgname);return -1;}}if (optind >= 0)argv[optind-1] = prgname;ret = optind-1;optind = 1; /* reset getopt lib */return ret;
}/** The main function, which does initialization and calls the lcore_main* function.*/
int
main(int argc, char *argv[])
{struct rte_mempool *mbuf_pool;uint16_t nb_ports;uint16_t portid;int ret;int socket_id;struct rte_table_acl_params table_acl_params;struct rte_flow_classify_table_params cls_table_params;struct flow_classifier *cls_app;//分流器struct rte_flow_classifier_params cls_params;uint32_t size;/* Initialize the Environment Abstraction Layer (EAL). */ret = rte_eal_init(argc, argv);if (ret < 0)rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");argc -= ret;argv += ret;/* parse application arguments (after the EAL ones) */ret = parse_args(argc, argv);if (ret < 0)rte_exit(EXIT_FAILURE, "Invalid flow_classify parameters\n");/* Check that there is an even number of ports to send/receive on. */nb_ports = rte_eth_dev_count_avail();if (nb_ports < 2 || (nb_ports & 1))rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");/* Creates a new mempool in memory to hold the mbufs. */mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());if (mbuf_pool == NULL)rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");/* Initialize all ports. */RTE_ETH_FOREACH_DEV(portid)if (port_init(portid, mbuf_pool) != 0)rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8 "\n",portid);if (rte_lcore_count() > 1)printf("\nWARNING: Too many lcores enabled. Only 1 used.\n");socket_id = rte_eth_dev_socket_id(0);// 返回 0 号网口所在的NUMA socket id号/* Memory allocation */// 为分流器 cls_app 分配内存size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct flow_classifier_acl));// 返回大于或等于宏定义参数的第一个缓存对齐值cls_app = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);// DPDK的malloc:从调用该函数的核上的同一个NUMA socket的大页面区域分配堆内存。// zmalloc 就是清零 与 calloc 相似/* rte_zmalloc 参数三个:1. 指示这块区域分配给怎样的object类型。用于debug用途。可以写NULL2. size (in bytes) to be allocated,这里分配一个cache缓存行的字节。3. alignif 0, 会返回一个适合任何类型变量的指针,就像 malloc否则,返回一个内存区域是 align 的对齐倍数,显然最小对齐是高速缓存行大小,宏:RTE_CACHE_LINE_SIZE*/if (cls_app == NULL)rte_exit(EXIT_FAILURE, "Cannot allocate classifier memory\n");// classifier 的参数 有两个: name 和 socket id// 需要在调用 create() API 之前由应用程序初始化cls_params.name = "flow_classifier";cls_params.socket_id = socket_id;// 调用 rte_flow_classifier_create() 函数来创建rte_flow_classifier对象。// 参数是 rte_flow_classifier_params 结构体指针cls_app->cls = rte_flow_classifier_create(&cls_params);if (cls_app->cls == NULL) {rte_free(cls_app);rte_exit(EXIT_FAILURE, "Cannot create classifier\n");}/* initialise ACL table params */// 填写 ACL 的初始化参数// 四个字段:table_acl_params.name = "table_acl_ipv4_5tuple";table_acl_params.n_rules = FLOW_CLASSIFY_MAX_RULE_NUM;table_acl_params.n_rule_fields = RTE_DIM(ipv4_defs);memcpy(table_acl_params.field_format, ipv4_defs, sizeof(ipv4_defs));// ACL rule 的详细内容 specification// ACL 规则的字段也必须由应用程序初始化。/* initialise table create params */cls_table_params.ops = &rte_table_acl_ops;//表操作(特定于每个表类型)cls_table_params.arg_create = &table_acl_params;// 传递给表的用于创建的参数 这里是ACL的初始化参数结构体的指针cls_table_params.type = RTE_FLOW_CLASSIFY_TABLE_ACL_IP4_5TUPLE;// rte_flow_classify_table_create() 向classifier对象添加一个表。// 参数两个:1. 流分类器的指针 2. 表创建的参数ret = rte_flow_classify_table_create(cls_app->cls, &cls_table_params);if (ret) {rte_flow_classifier_free(cls_app->cls);rte_free(cls_app);rte_exit(EXIT_FAILURE, "Failed to create classifier table\n");}/* read file of IPv4 5 tuple rules and initialize parameters* for rte_flow_classify_validate and rte_flow_classify_table_entry_add* API's.*// 然后它读取ipv4_rules_file.txt文件,验证流规则是否合法,然后初始化rte_flow_classify_table_entry_add() API 的参数,使用此API将规则添加到ACL表。if (add_rules(parm_config.rule_ipv4_name, cls_app)) {rte_flow_classifier_free(cls_app->cls);rte_free(cls_app);rte_exit(EXIT_FAILURE, "Failed to add rules\n");}/* Call lcore_main on the main core only. */lcore_main(cls_app);return 0;
}
flow_classify 这个程序做的事情分为如下几步骤:
EAL初始化、端口初始化、分配内存等,与basicfw是一样的。
创建 flow_classifer对象。这一个过程在代码中体现好几个阶段:为classifier分配内存、填写 ACL 的初始化参数、填写 table 的初始化参数、创建 classifer 对象。
读取 ipv4_rules_file.txt 这个文件,文件中一行是一个规则,一行的内容是一个ipv4的五元组。如果符合输入的合法性验证要求,就把里面的内容,提成特定的数据结构,插入到 classifer 里。2、3两步过程中封装了多层,还涉及非常多的数据结构和API。不容易搞懂。(其实也不需要完全搞懂,我后面有说,继续往下看)
添加完规则后进入lcore_main主线程,死循环收包(参照basicfw)。每次收上来的一堆包,就对 classifier 里的每条规则进行都 query,用到DPDK的API。如果其中有符合规则的packet(也就是query rule 匹配),就会在对应 rule 的 counter 加 1 并显示 counter 的数字(匹配成功次数),失败的话就显示“没有匹配到这条规则”的提示语句。然后不论匹配是否成功,都把这批包从另一个端口转发了
三.运行情况
ipv4_rules_file.txt内容:
#src_ip/masklen dst_ip/masklen src_port : mask dst_port : mask proto/mask priority
#
2.2.2.3/24 2.2.2.7/24 32 : 0xffff 33 : 0xffff 17/0xff 0
9.9.9.3/24 9.9.9.7/24 32 : 0xffff 33 : 0xffff 17/0xff 1
9.9.9.3/24 9.9.9.7/24 32 : 0xffff 33 : 0xffff 6/0xff 2
9.9.8.3/24 9.9.8.7/24 32 : 0xffff 33 : 0xffff 6/0xff 3
6.7.8.9/24 2.3.4.5/24 32 : 0x0000 33 : 0x0000 132/0xff 4
6.7.8.9/32 192.168.0.36/32 10 : 0xffff 11 : 0xffff 6/0xfe 5
6.7.8.9/24 192.168.0.36/24 10 : 0xffff 11 : 0xffff 6/0xfe 6
6.7.8.9/16 192.168.0.36/16 10 : 0xffff 11 : 0xffff 6/0xfe 7
6.7.8.9/8 192.168.0.36/8 10 : 0xffff 11 : 0xffff 6/0xfe 8
./flow_classify -c 1 -n 4 -- --rule_ipv4="../ipv4_rules_file.txt"
Core 0 forwarding packets. [Ctrl+C to quit]rule [0] query failed ret [-22]rule [1] query failed ret [-22]rule [2] query failed ret [-22]rule [3] query failed ret [-22]rule [4] query failed ret [-22]rule [5] query failed ret [-22]rule [6] query failed ret [-22]rule [7] query failed ret [-22]rule [8] query failed ret [-22]
没有改动原来自带的规则文件,因此不会有匹配成功,提示的都是匹配失败。
四.改动ipv4_rules_file.txt
目前没成功,待改进!
参考:https://blog.csdn.net/weixin_30693683/article/details/98337353
dpdk实例flow_classify相关推荐
- DPDK flow_classify 源码阅读
代码部分 /* SPDX-License-Identifier: BSD-3-Clause* Copyright(c) 2017 Intel Corporation*/#include <std ...
- DPDK学习(二)DPDK多进程支持
在DPDK中,多进程支持旨在允许一组DPDK进程以简单的透明方式协同工作,以执行数据包处理或其他工作负载.为了支持此功能,已经对核心的DPDK环境抽象层(EAL)进行了一些增加. EAL已被修改为允许 ...
- DPDK网卡驱动流程总结
本文基于DPDK-16.07.2.Linux 4.4.2分析总结 1 简介 1.1 什么是UIO技术 UIO(Userspace I/O)是运行在用户空间的I/O技术,Linux系统中一般的驱动设备都 ...
- 7. EAL parameters(dpdk参数介绍)
https://doc.dpdk.org/guides-18.11/linux_gsg/linux_eal_parameters.html?highlight=parameters 版本:18.11 ...
- 深入浅出DPDK-第一章
1.DPDK概述 用软件的方式在通用多核处理器上,演绎着数据包处理的新篇章 2.数据包处理的主流硬件平台 1)硬件加速器 2)网络处理器 3)多核处理器 DPDK主要是以IA(Intel archit ...
- 前端开发基础知识汇总
一.HTML 1.前言与常用标签 浏览器 内核 备注 IE Trident IE.猎豹安全.360极速浏览器.百度浏览器 firefox Gecko 可惜这几年已经没落了,打开速度慢.升级频繁.猪一样 ...
- Spring Cloud微服务系统架构的一些简单介绍和使用
Spring Cloud 目录 特征 云原生应用程序 Spring Cloud上下文:应用程序上下文服务 引导应用程序上下文 应用程序上下文层次结构 改变Bootstrap的位置Properties ...
- DPDK官方例程分析(4)-flow_classify
前言 Flow Classify示例应用程序基于转发应用程序的简单框架示例. 它旨在演示使用Flow Classify库API的DPDK转发应用程序的基本组件 flow_classify例子对于DPD ...
- DPDK 跟踪库tracepoint源码实例分析
DPDK笔记 DPDK 跟踪库tracepoint源码实例分析 RToax 2021年4月 注意: 跟踪库 基于DPDK 20.05 DPDK跟踪库:trace library 1. trace流程源 ...
最新文章
- 英特尔在移动市场另辟蹊径
- 西湖大学蓝振忠:预训练语言模型的前沿发展趋势
- OCS 如何在数据库直接给用户增加联络人
- TRDD got lost again
- 每天一道LeetCode-----计算给定范围内所有数的与运算结果
- java学习之—链表(3)
- 手机网站按住放大图片_如何用PPT制作放大镜效果?ppt怎样实现放大镜效果?PPT放大镜效果教学?...
- 【数据集】机器学习数据集汇总(附下载地址)
- (day 51 - 字符转化为数字 ) 剑指 Offer 67. 把字符串转换成整数
- 机器学习笔记(十四):主成分分析法(PCA)(2)
- ThinkPHP统计某个分类的文章数目
- stomp+websocket 集群问题_手把手搭建WebSocket多人在线聊天室
- ThinkingInJava 学习 之 0000001 一切都是对象
- 十大关系数据库SQL注入工具一览
- html倒计时动画,js+css3倒计时动画特效
- go 注册登录 mysql_golang后台 实现用户登录注册
- 如何对图片主题色进行提取
- <2021SC@SDUSC>博客(5)山东大学软件工程应用与实践JPress代码分析(四)
- 【C++】spdlog--log4cxx有点笨重,试一试spdlog
- Unity3D 模型换肤技术