OVS dp支持的action都在do_execute_actions函数中定义,支持的action包括:

OVS_ACTION_ATTR_OUTPUT
OVS_ACTION_ATTR_USERSPACE
OVS_ACTION_ATTR_HASH
OVS_ACTION_ATTR_PUSH_MPLS
OVS_ACTION_ATTR_POP_MPLS
OVS_ACTION_ATTR_PUSH_VLAN
OVS_ACTION_ATTR_POP_VLAN
OVS_ACTION_ATTR_RECIRC
OVS_ACTION_ATTR_SET
OVS_ACTION_ATTR_SET_MASKED
OVS_ACTION_ATTR_SET_TO_MASKED
OVS_ACTION_ATTR_SAMPLE
OVS_ACTION_ATTR_CT

本系列要完成这些action的分析,output已经在之前介绍datapath主流程时已经介绍,不再进行介绍。

一、OVS_ACTION_ATTR_USERSPACE

本节为OVS_ACTION_ATTR_USERSPACE的处理函数为output_userspace函数,以此函数作为入口进行分析。

1、output_userspace函数

static int output_userspace(struct datapath *dp, struct sk_buff *skb,struct sw_flow_key *key, const struct nlattr *attr,const struct nlattr *actions, int actions_len)
{struct ip_tunnel_info info;struct dp_upcall_info upcall;const struct nlattr *a;int rem;memset(&upcall, 0, sizeof(upcall));upcall.cmd = OVS_PACKET_CMD_ACTION;         //封装upcall对象upcall.mru = OVS_CB(skb)->mru;for (a = nla_data(attr), rem = nla_len(attr); rem > 0;     //获取userspace action相关的信息a = nla_next(a, &rem)) {switch (nla_type(a)) {case OVS_USERSPACE_ATTR_USERDATA:upcall.userdata = a;break;case OVS_USERSPACE_ATTR_PID:upcall.portid = nla_get_u32(a);break;case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {/* Get out tunnel info. */struct vport *vport;vport = ovs_vport_rcu(dp, nla_get_u32(a));if (vport) {int err;upcall.egress_tun_info = &info;err = ovs_vport_get_egress_tun_info(vport, skb,&upcall);if (err)upcall.egress_tun_info = NULL;}break;}case OVS_USERSPACE_ATTR_ACTIONS: {/* Include actions. */upcall.actions = actions;upcall.actions_len = actions_len;break;}} /* End of switch. */}return ovs_dp_upcall(dp, skb, key, &upcall);    //upcall
}

2、ovs_dp_upcall函数

int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,const struct sw_flow_key *key,const struct dp_upcall_info *upcall_info)
{struct dp_stats_percpu *stats;int err;if (upcall_info->portid == 0) {err = -ENOTCONN;goto err;}if (!skb_is_gso(skb))err = queue_userspace_packet(dp, skb, key, upcall_info);elseerr = queue_gso_packets(dp, skb, key, upcall_info);if (err)goto err;return 0;err:stats = this_cpu_ptr(dp->stats_percpu);u64_stats_update_begin(&stats->syncp);stats->n_lost++;u64_stats_update_end(&stats->syncp);return err;
}

3、queue_userspace_packet函数

static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,const struct sw_flow_key *key,const struct dp_upcall_info *upcall_info)
{struct ovs_header *upcall;struct sk_buff *nskb = NULL;struct sk_buff *user_skb = NULL; /* to be queued to userspace */struct nlattr *nla;struct genl_info info = {#ifdef HAVE_GENLMSG_NEW_UNICAST.dst_sk = ovs_dp_get_net(dp)->genl_sock,
#endif.snd_portid = upcall_info->portid,};size_t len;unsigned int hlen;int err, dp_ifindex;dp_ifindex = get_dpifindex(dp);if (!dp_ifindex)return -ENODEV;if (skb_vlan_tag_present(skb)) {nskb = skb_clone(skb, GFP_ATOMIC);if (!nskb)return -ENOMEM;nskb = vlan_insert_tag_set_proto(nskb, nskb->vlan_proto, skb_vlan_tag_get(nskb));if (!nskb)return -ENOMEM;vlan_set_tci(nskb, 0);  //为什么要把tci置0?skb = nskb;}if (nla_attr_size(skb->len) > USHRT_MAX) {err = -EFBIG;goto out;}/* Complete checksum if needed */if (skb->ip_summed == CHECKSUM_PARTIAL &&(err = skb_checksum_help(skb)))goto out;/* Older versions of OVS user space enforce alignment of the last* Netlink attribute to NLA_ALIGNTO which would require extensive* padding logic. Only perform zerocopy if padding is not required.*/if (dp->user_features & OVS_DP_F_UNALIGNED)hlen = skb_zerocopy_headlen(skb);elsehlen = skb->len;len = upcall_msg_size(upcall_info, hlen);user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);        //创建upcall消息对象if (!user_skb) {err = -ENOMEM;goto out;}upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,    //dp_packet_genl_family 和 upcall_info->cmd确定处理函数0, upcall_info->cmd);upcall->dp_ifindex = dp_ifindex;err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);  //upcall信息对象添加keyBUG_ON(err);if (upcall_info->userdata)    __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,   //upcall信息对象添加userdatanla_len(upcall_info->userdata),nla_data(upcall_info->userdata));if (upcall_info->egress_tun_info) {nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);  //upcall信息对象添加egress_tun_infoerr = ovs_nla_put_egress_tunnel_key(user_skb,upcall_info->egress_tun_info,upcall_info->egress_tun_opts);BUG_ON(err);nla_nest_end(user_skb, nla);}if (upcall_info->actions_len) {nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);  //upcall信息对象添加actionserr = ovs_nla_put_actions(upcall_info->actions,upcall_info->actions_len,user_skb);if (!err)nla_nest_end(user_skb, nla);elsenla_nest_cancel(user_skb, nla);}/* Add OVS_PACKET_ATTR_MRU */if (upcall_info->mru) {if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,upcall_info->mru)) {err = -ENOBUFS;goto out;}pad_packet(dp, user_skb);}/* Only reserve room for attribute header, packet data is added* in skb_zerocopy()*/if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {err = -ENOBUFS;goto out;}nla->nla_len = nla_attr_size(skb->len);err = skb_zerocopy(user_skb, skb, skb->len, hlen);    //upcall信息对象添加报文if (err)goto out;/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */pad_packet(dp, user_skb);((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);    //发送netlink报文user_skb = NULL;
out:if (err)skb_tx_error(skb);kfree_skb(user_skb);kfree_skb(nskb);return err;
}

到此可以看到userspace action和精确流表未匹配导致的upcall在处理流程上是比较一致的,两者都是通过调用ovs_dp_upcall函数实现信息发送到用户态程序。upcall处理线程是如何处理的不在本篇分析,将在后续给出分析。

通过userspace能够实现什么功能呢? 现在还想不出,等分析upcall处理后,再回过头来回答这个问题。

二、OVS_ACTION_ATTR_HASH

本节分析OVS_ACTION_ATTR_HASH action,该action的处理函数为execute_hash函数

1、execute_hash函数

static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,const struct nlattr *attr)
{struct ovs_action_hash *hash_act = nla_data(attr);u32 hash = 0;/* OVS_HASH_ALG_L4 is the only possible hash algorithm.  */hash = skb_get_hash(skb);hash = jhash_1word(hash, hash_act->hash_basis);if (!hash)hash = 0x1;key->ovs_flow_hash = hash;  //计算hash值
}

该action仅对key的ovs_flow_hash成员变量进行了修改,从该变量的使用地方逆推,最终是queue_userspace_packet会使用,该函数是把报文发送给用户态进程,本次就看下queue_userspace_packet函数是如何使用到该成员变量的。

2、queue_userspace_packet函数

static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,const struct sw_flow_key *key,const struct dp_upcall_info *upcall_info)
{struct ovs_header *upcall;struct sk_buff *nskb = NULL;struct sk_buff *user_skb = NULL; /* to be queued to userspace */struct nlattr *nla;struct genl_info info = {#ifdef HAVE_GENLMSG_NEW_UNICAST.dst_sk = ovs_dp_get_net(dp)->genl_sock,
#endif.snd_portid = upcall_info->portid,};size_t len;unsigned int hlen;int err, dp_ifindex;dp_ifindex = get_dpifindex(dp);if (!dp_ifindex)return -ENODEV;if (skb_vlan_tag_present(skb)) {nskb = skb_clone(skb, GFP_ATOMIC);if (!nskb)return -ENOMEM;nskb = vlan_insert_tag_set_proto(nskb, nskb->vlan_proto, skb_vlan_tag_get(nskb));if (!nskb)return -ENOMEM;vlan_set_tci(nskb, 0);  //为什么要把tci置0?skb = nskb;}if (nla_attr_size(skb->len) > USHRT_MAX) {err = -EFBIG;goto out;}/* Complete checksum if needed */if (skb->ip_summed == CHECKSUM_PARTIAL &&(err = skb_checksum_help(skb)))goto out;/* Older versions of OVS user space enforce alignment of the last* Netlink attribute to NLA_ALIGNTO which would require extensive* padding logic. Only perform zerocopy if padding is not required.*/if (dp->user_features & OVS_DP_F_UNALIGNED)hlen = skb_zerocopy_headlen(skb);elsehlen = skb->len;len = upcall_msg_size(upcall_info, hlen);user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);        //创建upcall消息对象if (!user_skb) {err = -ENOMEM;goto out;}upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,    //dp_packet_genl_family 和 upcall_info->cmd确定处理函数0, upcall_info->cmd);upcall->dp_ifindex = dp_ifindex;err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);  //upcall信息对象添加key,该函数最终会用到ovs_flow_hashBUG_ON(err);if (upcall_info->userdata)    __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,   //upcall信息对象添加userdatanla_len(upcall_info->userdata),nla_data(upcall_info->userdata));if (upcall_info->egress_tun_info) {nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);  //upcall信息对象添加egress_tun_infoerr = ovs_nla_put_egress_tunnel_key(user_skb,upcall_info->egress_tun_info,upcall_info->egress_tun_opts);BUG_ON(err);nla_nest_end(user_skb, nla);}if (upcall_info->actions_len) {nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);  //upcall信息对象添加actionserr = ovs_nla_put_actions(upcall_info->actions,upcall_info->actions_len,user_skb);if (!err)nla_nest_end(user_skb, nla);elsenla_nest_cancel(user_skb, nla);}/* Add OVS_PACKET_ATTR_MRU */if (upcall_info->mru) {if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,upcall_info->mru)) {err = -ENOBUFS;goto out;}pad_packet(dp, user_skb);}/* Only reserve room for attribute header, packet data is added* in skb_zerocopy()*/if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {err = -ENOBUFS;goto out;}nla->nla_len = nla_attr_size(skb->len);err = skb_zerocopy(user_skb, skb, skb->len, hlen);    //upcall信息对象添加报文if (err)goto out;/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */pad_packet(dp, user_skb);((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);    //发送netlink报文user_skb = NULL;
out:if (err)skb_tx_error(skb);kfree_skb(user_skb);kfree_skb(nskb);return err;
}

3、ovs_nla_put_key函数

int ovs_nla_put_key(const struct sw_flow_key *swkey,const struct sw_flow_key *output, int attr, bool is_mask,struct sk_buff *skb)
{int err;struct nlattr *nla;nla = nla_nest_start(skb, attr);if (!nla)return -EMSGSIZE;err = __ovs_nla_put_key(swkey, output, is_mask, skb);if (err)return err;nla_nest_end(skb, nla);return 0;
}

4、__ovs_nla_put_key函数

static int __ovs_nla_put_key(const struct sw_flow_key *swkey,const struct sw_flow_key *output, bool is_mask,struct sk_buff *skb)
{struct ovs_key_ethernet *eth_key;struct nlattr *nla, *encap;if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))goto nla_put_failure;if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))     //使用到该变量goto nla_put_failure;if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))goto nla_put_failure;if ((swkey->tun_key.u.ipv4.dst || is_mask)) {const void *opts = NULL;if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,swkey->tun_opts_len))goto nla_put_failure;}if (swkey->phy.in_port == DP_MAX_PORTS) {if (is_mask && (output->phy.in_port == 0xffff))if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))goto nla_put_failure;

三、OVS_ACTION_ATTR_PUSH_VLAN

本节分析OVS_ACTION_ATTR_PUSH_VLAN action,该action的处理函数为push_vlan。

1、push_vlan函数

static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,const struct ovs_action_push_vlan *vlan)
{if (skb_vlan_tag_present(skb))     //如果报文已经包含vlaninvalidate_flow_key(key); //设置key的以太报文类型为0elsekey->eth.tci = vlan->vlan_tci;   //设置key的报文tci值return skb_vlan_push(skb, vlan->vlan_tpid, //添加vlan信息ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
}

2、skb_vlan_push函数

#define skb_vlan_push rpl_skb_vlan_push
int rpl_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
{if (skb_vlan_tag_present(skb)) {   //如果报文已经有vlan信息unsigned int offset = skb->data - skb_mac_header(skb);       //计算data与mac之间的offsetint err;/* __vlan_insert_tag expect skb->data pointing to mac header.* So change skb->data before calling it and change back to* original position later*/__skb_push(skb, offset);           //data切换到mac地址err = __vlan_insert_tag(skb, skb->vlan_proto, //插入vlan标签skb_vlan_tag_get(skb));if (err)return err;skb->mac_len += VLAN_HLEN;     //skb二层头长度增加VLAN头长度,4个字节__skb_pull(skb, offset);     //data回到源位置,实际是相比之前的报文,还要往回移4字节if (skb->ip_summed == CHECKSUM_COMPLETE)  //重新计算checksum值skb->csum = csum_add(skb->csum, csum_partial(skb->data+ (2 * ETH_ALEN), VLAN_HLEN, 0));}__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);     //设置skb->vlan_tci值,不修改报文数据return 0;
}

3、__vlan_insert_tag函数

#define __vlan_insert_tag(skb, proto, tci) rpl_vlan_insert_tag(skb, tci)
static inline int rpl_vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
{struct vlan_ethhdr *veth;if (skb_cow_head(skb, VLAN_HLEN) < 0)  //如果skb的headroom不能增加vlan头长度,则需要扩展return -ENOMEM;veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN);      //data往前移4字节,使得报文在二层多出4字节存放vlan tag/* Move the mac addresses to the beginning of the new header. */memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN);    //目的mac和源mac拷贝到新的位置,共拷贝12字节skb->mac_header -= VLAN_HLEN;    //mac头也往前移了4字节/* first, the ethernet type */veth->h_vlan_proto = htons(ETH_P_8021Q);    //设置vlan tag的报文类型,veth指针指向不是目的mac地址的位置吗?为什么?/* now, the TCI */veth->h_vlan_TCI = htons(vlan_tci);      //设置vlan tag的tci值return 0;
}

push vlan动作就是如果报文已经有vlan,那么先修改报文的数据,添加vlan头,然后再设置skb->vlan_tci,该vlan头由硬件在发送时添加到报文中。

四、OVS_ACTION_ATTR_POP_VLAN

本节分析OVS_ACTION_ATTR_POP_VLAN action的处理函数pop_vlan。

1、pop_vlan函数

static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
{int err;err = skb_vlan_pop(skb);if (skb_vlan_tag_present(skb))        //如果还存在vlan,则设置key的报文类型为0invalidate_flow_key(key);elsekey->eth.tci = 0;      //设置key的tci为0return err;
}

2、skb_vlan_pop函数

#define skb_vlan_pop rpl_skb_vlan_pop
int rpl_skb_vlan_pop(struct sk_buff *skb)
{u16 vlan_tci;__be16 vlan_proto;int err;if (likely(skb_vlan_tag_present(skb))) {    //如果skb的vlan_tci非0,直接设置该值为0skb->vlan_tci = 0;    } else {if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&skb->protocol != htons(ETH_P_8021AD)) ||skb->len < VLAN_ETH_HLEN))return 0;err = __skb_vlan_pop(skb, &vlan_tci);    //skb报文pop vlan,修改报文数据if (err)return err;}/* move next vlan tag to hw accel tag */if (likely((skb->protocol != htons(ETH_P_8021Q) &&             //qinq场景skb->protocol != htons(ETH_P_8021AD)) ||skb->len < VLAN_ETH_HLEN))return 0;vlan_proto = htons(ETH_P_8021Q);err = __skb_vlan_pop(skb, &vlan_tci);     //需要进一步pop vlanif (unlikely(err))return err;__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);   //设置skb的vlan_tci值return 0;
}

3、__skb_vlan_pop函数

static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
{struct vlan_hdr *vhdr;unsigned int offset = skb->data - skb_mac_header(skb);int err;__skb_push(skb, offset);   //data切换到mac地址err = skb_ensure_writable(skb, VLAN_ETH_HLEN);if (unlikely(err))goto pull;skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);*vlan_tci = ntohs(vhdr->h_vlan_TCI);memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);   //mac往后移4个字节__skb_pull(skb, VLAN_HLEN);                 //skb数据往后移4个字节vlan_set_encap_proto(skb, vhdr);        //解析内层报文,被赋值给skb protocolskb->mac_header += VLAN_HLEN;        //mac_header往后移4个字节if (skb_network_offset(skb) < ETH_HLEN)       //如果网络头的偏移小于二层长度(14字节)skb_set_network_header(skb, ETH_HLEN);  //设置网络头的偏移为14字节skb_reset_mac_len(skb);      //重新设置二层长度,等于network_header - mac_header
pull:__skb_pull(skb, offset);       //data切换到源位置,由于去掉了vlan头,实际会多往前移动4字节return err;
}

pop vlan,如果报文vlan已经解析,即放在skb的vlan_tci变量,那么直接把该变量赋值为0,key的vlan_tci设置为0即可;否则的话就需要修改skb的报文数据,软件最多会pop两个vlan头,硬件还可以剥一个头。

五、OVS_ACTION_ATTR_SET

本节分析OVS_ACTION_ATTR_SET action的处理函数execute_set_action函数。

1、execute_set_action函数

static int execute_set_action(struct sk_buff *skb,struct sw_flow_key *flow_key,const struct nlattr *a)
{/* Only tunnel set execution is supported without a mask. */if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {struct ovs_tunnel_info *tun = nla_data(a);ovs_skb_dst_drop(skb);                                     ovs_dst_hold((struct dst_entry *)tun->tun_dst);            //为什么要调用空函数ovs_skb_dst_set(skb, (struct dst_entry *)tun->tun_dst);    //设置skb的tun_dst成员对象,这个信息在vxlan报文发包的时候使用return 0;}return -EINVAL;
}

该action的处理函数非常简单,仅设置了一个参数。 我们来看看是如何被使用到的,以vxlan隧道为例,我们从vxlan端口的send函数(vxlan_xmit)入手来看。

2、vxlan_xmit函数

#define vxlan_xmit rpl_vxlan_xmit
netdev_tx_t rpl_vxlan_xmit(struct sk_buff *skb)
{struct net_device *dev = skb->dev;struct vxlan_dev *vxlan = netdev_priv(dev);const struct ip_tunnel_info *info;info = skb_tunnel_info(skb);    //得到tunnel信息,即execute_set_action函数设置的内容skb_reset_mac_header(skb);if ((vxlan->flags & VXLAN_F_PROXY))goto out;if (vxlan->flags & VXLAN_F_COLLECT_METADATA &&info && info->mode & IP_TUNNEL_INFO_TX) {vxlan_xmit_one(skb, dev, NULL, false);return NETDEV_TX_OK;}
out:pr_warn("vxlan: unsupported flag set %x", vxlan->flags);kfree_skb(skb);return NETDEV_TX_OK;
}

vxlan报文发送流程,不在这里分析。 通过分析,该action的作用是封装报文,通过隧道发送报文。 在dp的层面,只有一个tunnel端口(每种tunnel隧道一个),而其他类型的端口可以是多个的,从这里也可以看到tunnel端口只是配置信息不同而已,所以只需要一个端口,配置信息在action中提供。

六、OVS_ACTION_ATTR_RECIRC

本节分析OVS_ACTION_ATTR_RECIRC action的处理函数execute_recirc。

1、execute_recirc函数

static int execute_recirc(struct datapath *dp, struct sk_buff *skb,struct sw_flow_key *key,const struct nlattr *a, int rem)
{struct deferred_action *da;if (!is_flow_key_valid(key)) {     //如果key为valid,需要重新生成keyint err;err = ovs_flow_key_update(skb, key);    //重新生成keyif (err)return err;}BUG_ON(!is_flow_key_valid(key));if (!nla_is_last(a, rem)) {    //如果action不是最后一个,则需要克隆skb/* Recirc action is the not the last action* of the action list, need to clone the skb.*/skb = skb_clone(skb, GFP_ATOMIC);/* Skip the recirc action when out of memory, but* continue on with the rest of the action list.*/if (!skb)return 0;}da = add_deferred_actions(skb, key, NULL);    //添加deferred actionif (da) {da->pkt_key.recirc_id = nla_get_u32(a);} else {kfree_skb(skb);if (net_ratelimit())pr_warn("%s: deferred action limit reached, drop recirc action\n",ovs_dp_name(dp));}return 0;
}

2、add_deferred_actions函数

/* Return queue entry if fifo is not full */
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,const struct sw_flow_key *key,const struct nlattr *attr)
{struct action_fifo *fifo;struct deferred_action *da;fifo = this_cpu_ptr(action_fifos);da = action_fifo_put(fifo);        //添加一个deferred_actionif (da) {da->skb = skb;da->actions = attr;             //recirc action,actions为空da->pkt_key = *key;}return da;
}

3、action_fifo_put函数

static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
{if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)return NULL;return &fifo->fifo[fifo->head++];
}

从上面可知,OVS_ACTION_ATTR_RECIRC action就是在action_fifos全局对象中添加一个deferred_action。 这些actions在什么被使用呢? 答案是ovs_execute_actions函数。

4、ovs_execute_actions函数

int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,const struct sw_flow_actions *acts,struct sw_flow_key *key)
{int level = this_cpu_read(exec_actions_level);int err;if (unlikely(level >= EXEC_ACTIONS_LEVEL_LIMIT)) {if (net_ratelimit())pr_warn("%s: packet loop detected, dropping.\n",ovs_dp_name(dp));kfree_skb(skb);return -ELOOP;}this_cpu_inc(exec_actions_level);err = do_execute_actions(dp, skb, key,acts->actions, acts->actions_len);if (!level)process_deferred_actions(dp);    //执行deferred actions, 前提条件是level为0,即第一次执行该函数时。可以把该action推迟到最后执行。this_cpu_dec(exec_actions_level);/* This return status currently does not reflect the errors* encounted during deferred actions execution. Probably needs to* be fixed in the future.*/return err;
}

5、process_deferred_actions函数

static void process_deferred_actions(struct datapath *dp)
{struct action_fifo *fifo = this_cpu_ptr(action_fifos);/* Do not touch the FIFO in case there is no deferred actions. */if (action_fifo_is_empty(fifo))return;/* Finishing executing all deferred actions. */do {struct deferred_action *da = action_fifo_get(fifo);struct sk_buff *skb = da->skb;struct sw_flow_key *key = &da->pkt_key;const struct nlattr *actions = da->actions;if (actions)do_execute_actions(dp, skb, key, actions,nla_len(actions));elseovs_dp_process_packet(skb, key);      //recirc进该流程,开始重新处理该报文,从查找流表开始,和前一次处理的差异就是key多了recirc_id。} while (!action_fifo_is_empty(fifo));/* Reset FIFO for the next packet.  */action_fifo_init(fifo);            //清空fifo
}

OVS_ACTION_ATTR_RECIRC action提供了重复处理的功能,但是这样的功能价值是什么? 现在还没想明白。

七、OVS_ACTION_ATTR_SET_MASKED 和 OVS_ACTION_ATTR_SET_TO_MASKED

本节分析OVS_ACTION_ATTR_SET_MASKED 和 OVS_ACTION_ATTR_SET_TO_MASKED action,处理函数为execute_masked_set_action函数。

1、execute_masked_set_action函数

static int execute_masked_set_action(struct sk_buff *skb,struct sw_flow_key *flow_key,const struct nlattr *a)
{int err = 0;switch (nla_type(a)) {case OVS_KEY_ATTR_PRIORITY:OVS_SET_MASKED(skb->priority, nla_get_u32(a),     //报文优先级设置, 用于tc控制*get_mask(a, u32 *));flow_key->phy.priority = skb->priority;break;case OVS_KEY_ATTR_SKB_MARK:OVS_SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));   //报文mark设置, iptables会使用flow_key->phy.skb_mark = skb->mark;break;case OVS_KEY_ATTR_TUNNEL_INFO:/* Masked data not supported for tunnel. */err = -EINVAL;break;case OVS_KEY_ATTR_ETHERNET:err = set_eth_addr(skb, flow_key, nla_data(a),     //设置源mac、目的macget_mask(a, struct ovs_key_ethernet *));break;case OVS_KEY_ATTR_IPV4:err = set_ipv4(skb, flow_key, nla_data(a),      //设置IPV4字段,源IP、目的IP、tos、ttl;get_mask(a, struct ovs_key_ipv4 *));break;case OVS_KEY_ATTR_IPV6:err = set_ipv6(skb, flow_key, nla_data(a),      //设置IPV6相关字段get_mask(a, struct ovs_key_ipv6 *));break;case OVS_KEY_ATTR_TCP:err = set_tcp(skb, flow_key, nla_data(a),      //设置tcp字段,修改源端口和目的端口get_mask(a, struct ovs_key_tcp *));break;case OVS_KEY_ATTR_UDP:err = set_udp(skb, flow_key, nla_data(a),        //设置udp字段,修改源端口和目的端口get_mask(a, struct ovs_key_udp *));break;case OVS_KEY_ATTR_SCTP:err = set_sctp(skb, flow_key, nla_data(a),get_mask(a, struct ovs_key_sctp *));break;case OVS_KEY_ATTR_MPLS:err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,__be32 *));break;case OVS_KEY_ATTR_CT_STATE:case OVS_KEY_ATTR_CT_ZONE:case OVS_KEY_ATTR_CT_MARK:case OVS_KEY_ATTR_CT_LABELS:err = -EINVAL;break;}return err;
}

2、set_eth_addr函数

static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,const struct ovs_key_ethernet *key,const struct ovs_key_ethernet *mask)
{int err;err = skb_ensure_writable(skb, ETH_HLEN);if (unlikely(err))return err;skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,mask->eth_src);ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,mask->eth_dst);ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);return 0;
}

3、set_ipv4函数

static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,const struct ovs_key_ipv4 *key,const struct ovs_key_ipv4 *mask)
{struct iphdr *nh;__be32 new_addr;int err;err = skb_ensure_writable(skb, skb_network_offset(skb) +sizeof(struct iphdr));if (unlikely(err))return err;nh = ip_hdr(skb);/* Setting an IP addresses is typically only a side effect of* matching on them in the current userspace implementation, so it* makes sense to check if the value actually changed.*/if (mask->ipv4_src) {new_addr = OVS_MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);if (unlikely(new_addr != nh->saddr)) {set_ip_addr(skb, nh, &nh->saddr, new_addr);flow_key->ipv4.addr.src = new_addr;}}if (mask->ipv4_dst) {new_addr = OVS_MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);if (unlikely(new_addr != nh->daddr)) {set_ip_addr(skb, nh, &nh->daddr, new_addr);flow_key->ipv4.addr.dst = new_addr;}}if (mask->ipv4_tos) {ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);flow_key->ip.tos = nh->tos;}if (mask->ipv4_ttl) {set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);flow_key->ip.ttl = nh->ttl;}return 0;
}

4、set_udp函数

static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,const struct ovs_key_udp *key,const struct ovs_key_udp *mask)
{struct udphdr *uh;__be16 src, dst;int err;err = skb_ensure_writable(skb, skb_transport_offset(skb) +sizeof(struct udphdr));if (unlikely(err))return err;uh = udp_hdr(skb);/* Either of the masks is non-zero, so do not bother checking them. */src = OVS_MASKED(uh->source, key->udp_src, mask->udp_src);dst = OVS_MASKED(uh->dest, key->udp_dst, mask->udp_dst);if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {if (likely(src != uh->source)) {set_tp_port(skb, &uh->source, src, &uh->check);flow_key->tp.src = src;}if (likely(dst != uh->dest)) {set_tp_port(skb, &uh->dest, dst, &uh->check);flow_key->tp.dst = dst;}if (unlikely(!uh->check))uh->check = CSUM_MANGLED_0;} else {uh->source = src;uh->dest = dst;flow_key->tp.src = src;flow_key->tp.dst = dst;}skb_clear_hash(skb);return 0;
}

本节分析的action的作用是修改skb报文,通过key和mask两个值可以修改任意sw_flow_key结构体定义的字段。基于该框架,可以任意修改报文内容。例如arp代答等等。

八、OVS_ACTION_ATTR_SAMPLE

本节分析OVS_ACTION_ATTR_SAMPLE action的处理函数sample。

1、sample函数

static int sample(struct datapath *dp, struct sk_buff *skb,struct sw_flow_key *key, const struct nlattr *attr,const struct nlattr *actions, int actions_len)
{const struct nlattr *acts_list = NULL;const struct nlattr *a;int rem;for (a = nla_data(attr), rem = nla_len(attr); rem > 0;a = nla_next(a, &rem)) {u32 probability;switch (nla_type(a)) {case OVS_SAMPLE_ATTR_PROBABILITY:      //提供概率设置probability = nla_get_u32(a);if (!probability || prandom_u32() > probability)return 0;break;case OVS_SAMPLE_ATTR_ACTIONS:       //提供对采样报文的处理acts_list = a;break;}}rem = nla_len(acts_list);a = nla_data(acts_list);/* Actions list is empty, do nothing */if (unlikely(!rem))return 0;/* The only known usage of sample action is having a single user-space* action. Treat this usage as a special case.* The output_userspace() should clone the skb to be sent to the* user space. This skb will be consumed by its caller.*/if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&nla_is_last(a, rem)))return output_userspace(dp, skb, key, a, actions, actions_len);   //发送到用户态,相比OVS_ACTION_ATTR_USERSPACE,提供概率的能力skb = skb_clone(skb, GFP_ATOMIC);if (!skb)/* Skip the sample action when out of memory. */return 0;if (!add_deferred_actions(skb, key, a)) {   //放到fifo数组中,在最后处理if (net_ratelimit())pr_warn("%s: deferred actions limit reached, dropping sample action\n",ovs_dp_name(dp));kfree_skb(skb);}return 0;
}

output_userspace在前几篇已经分析过,会把报文上传到用户态,用户态如何处理后续分析。add_deferred_actions会把报文放在fifo数组中,在报文处理的最后时刻处理,看ovs_execute_actions函数。

2、ovs_execute_actions函数

/* Execute a list of actions against 'skb'. */
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,const struct sw_flow_actions *acts,struct sw_flow_key *key)
{int level = this_cpu_read(exec_actions_level);int err;if (unlikely(level >= EXEC_ACTIONS_LEVEL_LIMIT)) {if (net_ratelimit())pr_warn("%s: packet loop detected, dropping.\n",ovs_dp_name(dp));kfree_skb(skb);return -ELOOP;}this_cpu_inc(exec_actions_level);err = do_execute_actions(dp, skb, key,acts->actions, acts->actions_len);if (!level)    //do_execute_actions如果循环进入此函数,那么level非零,不会进入process_deferred_actions(dp);this_cpu_dec(exec_actions_level);/* This return status currently does not reflect the errors* encounted during deferred actions execution. Probably needs to* be fixed in the future.*/return err;
}

我们再看一下process_deferred_actions函数是怎么处理的。

3、process_deferred_actions函数

static void process_deferred_actions(struct datapath *dp)
{struct action_fifo *fifo = this_cpu_ptr(action_fifos);/* Do not touch the FIFO in case there is no deferred actions. */if (action_fifo_is_empty(fifo))return;/* Finishing executing all deferred actions. */do {struct deferred_action *da = action_fifo_get(fifo);struct sk_buff *skb = da->skb;struct sw_flow_key *key = &da->pkt_key;const struct nlattr *actions = da->actions;if (actions)do_execute_actions(dp, skb, key, actions,    //sample进入此分支nla_len(actions));elseovs_dp_process_packet(skb, key);   //recirc进该流程,开始重新处理该报文} while (!action_fifo_is_empty(fifo));/* Reset FIFO for the next packet.  */action_fifo_init(fifo);           //清空fifo
}

sample总体提供两个功能,1)概率性地发送报文到用户态;2)两次处理报文的能力(自定义处理动作),为什么提供这个能力? 作用是什么? 希望通过进一步分析,能够回答这个问题。

原文链接:https://blog.csdn.net/one_clouder/article/details/52418570

OVS datapath之action分析(十九)相关推荐

  1. OVS vswitchd启动(三十九)

    ovs vswitchd的启动 vswitchd启动代码可参考ovs-vswitchd.c的main函数,其中最重要的两个函数是bridge_run以及netdev_run bridge_run vo ...

  2. 【转】ABP源码分析十九:Auditing

    审计跟踪(也叫审计日志)是与安全相关的按照时间顺序的记录,它们提供了活动序列的文档证据,这些活动序列可以在任何时间影响一个特定的操作. AuditInfo:定义如下图中需要被Audit的信息. Aud ...

  3. OVS datapath结构图(四十六)

  4. Netty in Action (十九) 第九章节 单元测试

    本章内容包括: 1)单元测试 2)EmbeddedChannel的说明 3)使用EmbeddedChannel测试ChannelHandler 对于一个Netty应用来说,ChannelHandler ...

  5. 鸟哥的Linux私房菜(基础篇)- 第十九章、认识与分析登录文件

    第十九章.认识与分析登录文件 最近升级日期:2009/09/14 当你的 Linux 系统出现不明原因的问题时,很多人都告诉你,你要查阅一下登录文件才能够知道系统出了什么问题了,所以说,了解登录文件是 ...

  6. SAP UI5 应用开发教程之三十九 - SAP UI5 应用出现白屏的一些常见错误和分析方法分享试读版

    一套适合 SAP UI5 初学者循序渐进的学习教程 教程目录 SAP UI5 本地开发环境的搭建 SAP UI5 应用开发教程之一:Hello World SAP UI5 应用开发教程之二:SAP U ...

  7. python数据挖掘学习笔记】十九.鸢尾花数据集可视化、线性回归、决策树花样分析

    #2018-04-05 16:57:26 April Thursday the 14 week, the 095 day SZ SSMR python数据挖掘学习笔记]十九.鸢尾花数据集可视化.线性回 ...

  8. osgEarth的Rex引擎原理分析(七十九)如何加载百度、高德、谷歌、微软的在线地图

    目标:(七十八)中的问题155 瓦片生成后,就是一堆图片.怎么对这堆图片进行编号,是目前主流互联网地图商分歧最大的地方.总结起来分为四个流派: 谷歌XYZ:Z表示缩放层级,Z=zoom:XY的原点在左 ...

  9. BetaFlight模块设计之二十九:滤波模块分析

    BetaFlight模块设计之二十九:滤波模块分析 滤波模块 滤波类型 1. slewFilter 2. simpleLowpassFilter 3. laggedMovingAverage 4. p ...

最新文章

  1. 今天開始學習silverlight了
  2. wordpress导航页采用分类目录排序
  3. Drupal 为前台(Front page)页面添加区域(Regions)
  4. 在Visual Studio Code中配置GO开发环境
  5. 平均要取多少个(0,1)中的随机数才能让和超过1
  6. mac查看进程 总是忘记
  7. mysql dba环境验收_面对一个全新的环境,作为一个Mysql DBA,首先应该了解什么?
  8. 从零开始学android:Activity初步
  9. matlab改进平方根算法,改进平方根请教
  10. html求相关系数,关于pearson相关系数的意义
  11. LINUX centos7.6修改静态IP与配置参数NM_CONTROLLED
  12. 流利阅读2019.1.4 Secrets of the booming beauty business
  13. 程序员用python给了女友一个七夕惊喜!
  14. 网络安全:namp扫描工具
  15. 插上耳机没声音 程序包ID:Audioplaybackdiagnostic 错误代码:0x80070002 源:引擎 上下文:升级
  16. UI设计师如何脱颖而出 面试过程中要注意什么
  17. 海德汉仿真软件+海德汉西门子视频教程
  18. VCS IDEA没有Enable Version Control Intergration
  19. 大家好!我是happy_HuHu
  20. 本地开发申请ssl证书并在宝塔上给网站配置ssl

热门文章

  1. 学python需要什么文化基础-中国大学MOOC的APP2020Python编程基础答案
  2. python训练营免费领取-马哥教育官网-专业Linux培训班,Python培训机构
  3. python恶搞-如何用python和vbs恶搞基友?
  4. python下载文件到指定文件夹-Python 获取指定文件夹下的目录和文件的实现
  5. python升级版本命令-pythonpip命令版本过低问题版本升级问题
  6. 儿童python编程入门-天津少儿编程Python入门
  7. spring中resource设计与实现
  8. MP4文件格式的解析,以及MP4文件的分割算法
  9. 题目1169:比较奇偶数个数
  10. JavaScript标准参考教材(alpha)--笔记