前面我们介绍到ovn-northd服务主要是将Northbound DB存储的逻辑拓扑翻译为chassis和Logical Flow,今天我们从代码的角度看一下他是怎么实现的。

最近发现了一篇不错的文章,点击这里查看part 1part 2

int main(int argc, char *argv[])
{
    //创建到nb db的连接,连接维护的数据记录在ovn-nb.ovsschema
    //注意函数ovsdb_idl_create的第三个参数,表示默认监控所有更新
    struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
                ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
    //I think... maybe... 分别忽略sb和hv配置更新的提示
    ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
    ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);

    //创建到sb db的连接,连接维护的数据记录在ovn-sb.ovsschema
    //注意函数ovsdb_idl_create的第三个参数,表示默认都不管
    struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
                ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));

    //当上面的ovsdb_idl_create的第三个参数是false的时候,需要调用这个接口
    //用来确认带有sbrec_table_sb_global的类会被复制
    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
    //用来确认带有sbrec_sb_global_col_nb_cfg的列会被复制,不提示
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);

    //确认Logical Flow表会被复制,确认该表里面的datapath、
    //pipeline、table_id、match、action会被复制并且不提示。
    //以下这些操作都是类似这里的操作,至于这些表和col可以参照前面介绍。
    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
    add_column_noalert(ovnsb_idl_loop.idl,
                &sbrec_logical_flow_col_logical_datapath);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);

    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
    add_column_noalert(ovnsb_idl_loop.idl,
                &sbrec_multicast_group_col_datapath);
    add_column_noalert(ovnsb_idl_loop.idl,
                &sbrec_multicast_group_col_tunnel_key);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);

    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
    add_column_noalert(ovnsb_idl_loop.idl,
                &sbrec_datapath_binding_col_tunnel_key);
    add_column_noalert(ovnsb_idl_loop.idl,
                &sbrec_datapath_binding_col_external_ids);

    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
    add_column_noalert(ovnsb_idl_loop.idl,
                &sbrec_port_binding_col_logical_port);
    add_column_noalert(ovnsb_idl_loop.idl,
                &sbrec_port_binding_col_tunnel_key);
    add_column_noalert(ovnsb_idl_loop.idl,
                &sbrec_port_binding_col_parent_port);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);

    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac);
    add_column_noalert(ovnsb_idl_loop.idl,
                &sbrec_mac_binding_col_logical_port);

    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);

    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);

    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
    add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);

    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);

    /* Main loop. */
    exiting = false;
    while (!exiting) {
        struct northd_context ctx = {
            .ovnnb_idl = ovnnb_idl_loop.idl,
            //加载nbdb的数据
            .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
            .ovnsb_idl = ovnsb_idl_loop.idl,
            //加载sbdb的数据
            .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
        };

        //将ctx->ovnnb_txn的各项数据通过翻译加载到ctx->ovnsb_txn中
        ovnnb_db_run(&ctx, &ovnsb_idl_loop);
        //主要更新端口信息
        ovnsb_db_run(&ctx, &ovnsb_idl_loop);
        if (ctx.ovnsb_txn) {
            check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
            check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
        }

        //监听服务,主要是处理各个ovn-controller的连接,通过json rpc的方式通信
        unixctl_server_run(unixctl);
        unixctl_server_wait(unixctl);
        if (exiting) {
            poll_immediate_wake();
        }
        //提交ctx的信息到nbdb和sbdb
        ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
        ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);

        poll_block();
        if (should_service_stop()) {
            exiting = true;
        }
    }

    unixctl_server_destroy(unixctl);
    ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
    ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
    service_stop();

    exit(res);
}
static void
ovnnb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
{
    //将nbdb数据库中的logical switches、routers和datapath的对应关系
    //更新到sb表Datapath_Binding中,并且存储在struct ovn_datapath
    build_datapaths(ctx, &datapaths);
    //将nbdb数据库中的logical switch ports
    //更新到sb表Port_Binding中,并且存储在struct ovn_port
    build_ports(ctx, &datapaths, &ports);
    //用来创建管理IP和MAC的表,比如配置了IP或者dynamic以及MAC时
    build_ipam(&datapaths, &ports);
    //基于nbdb的内容生成的Logical_Flow和Multicast_Group
    //生成的各级流表前面一篇文章都有介绍
    build_lflows(ctx, &datapaths, &ports);

    //Address_Set表更新
    sync_address_sets(ctx);
}
static void
ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
{
    if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
        return;
    }

    //更新Port_Binding表chassis列,不为空时意味着nbdb中将这个端口设置为UP
    update_logical_port_status(ctx);
    update_northbound_cfg(ctx, sb_loop);
}

Datapath_Binding、Port_Binding、MAC_Binding的创建都还比较简单,所以就不看了,主要看一下逻辑流表的生成,首先会创建逻辑交换网元的逻辑流表,然后创建逻辑路由网元的逻辑流表,最后会查看目前的逻辑流表,然后对比变化,将更改的逻辑流表写入数据库中,我们重点关注一下逻辑交换和逻辑路由两个网元的逻辑流表。

static void
build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
                    struct hmap *lflows, struct hmap *mcgroups)
{
    //创建ingress和egress的pre-ACL和ACL表,对应着ingress的表3-9和egress的0-6
    //这里面的每个逻辑都和下面类似,我们就不每个都细看了
    struct ovn_datapath *od;
    HMAP_FOR_EACH (od, key_node, datapaths) {
        build_pre_acls(od, lflows);
        build_pre_lb(od, lflows);
        build_pre_stateful(od, lflows);
        build_acls(od, lflows);
        build_flows(od, lflows);
        build_qos(od, lflows);
        build_lb(od, lflows);
        build_stateful(od, lflows);
    }

    //ingress 表0 100优先级的入口控制
    HMAP_FOR_EACH (od, key_node, datapaths) {
        //带VLAN标签的报文丢弃
        ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
                      "drop;");
        //源地址多播或者广播的报文丢弃
        ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
                      "drop;");
    }

    //ingress 表0 50优先级的端口安全控制
    struct ovn_port *op;
    HMAP_FOR_EACH (op, key_node, ports) {
        ds_put_format(&match, "inport == %s", op->json_key);
        build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
                               &match);

        const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
        if (queue_id) {
            ds_put_format(&actions, "set_queue(%s); ", queue_id);
        }
        ds_put_cstr(&actions, "next;");
        //符合条件的报文通过
        ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
                      ds_cstr(&match), ds_cstr(&actions));

        if (op->nbsp->n_port_security) {
            //根据IP或者ND的安全配置,确定是否有ingress 表1和2的端口安全设置,优先级80和90
            build_port_security_ip(P_IN, op, lflows);
            build_port_security_nd(op, lflows);
        }
    }

    //ingress 表1-2的端口安全默认优先级0的操作是允许通过
    HMAP_FOR_EACH (od, key_node, datapaths) {
        ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
        ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
    }

    //ingress 表10 100优先级,ARP请求,如果是localnet或者vtep的端口则通过
    HMAP_FOR_EACH (op, key_node, ports) {
        if ((!strcmp(op->nbsp->type, "localnet")) ||
            (!strcmp(op->nbsp->type, "vtep"))) {
            ds_clear(&match);
            ds_put_format(&match, "inport == %s", op->json_key);
            ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
                          ds_cstr(&match), "next;");
        }
    }

    //ingress 表10 50优先级 ARP请求,代答已知的IP
    HMAP_FOR_EACH (op, key_node, ports) {
        for (size_t i = 0; i < op->n_lsp_addrs; i++) {
            for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
                ds_clear(&match);
                ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
                              op->lsp_addrs[i].ipv4_addrs[j].addr_s);
                ds_clear(&actions);
                ds_put_format(&actions,
                    "eth.dst = eth.src; "
                    "eth.src = %s; "
                    "arp.op = 2; /* ARP reply */ "
                    "arp.tha = arp.sha; "
                    "arp.sha = %s; "
                    "arp.tpa = arp.spa; "
                    "arp.spa = %s; "
                    "outport = inport; "
                    "flags.loopback = 1; "
                    "output;",
                    op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
                    op->lsp_addrs[i].ipv4_addrs[j].addr_s);
                ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
                              ds_cstr(&match), ds_cstr(&actions));

                //DHCP客户端通过ARP请求判定是否有IP冲突的时候,会请求自己已有的IP地址
                //这个时候不要代答,所以需要判定ARP请求的IP和发送端的IP一致时,通过
                ds_put_format(&match, " && inport == %s", op->json_key);
                ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
                              ds_cstr(&match), "next;");
            }
        }
    }

    //ingress 表10 0优先级,默认通过ARP请求
    HMAP_FOR_EACH (od, key_node, datapaths) {
        ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
    }

    //ingress 表11-12 100优先级,DHCP options和response操作
    HMAP_FOR_EACH (op, key_node, ports) {
        for (size_t i = 0; i < op->n_lsp_addrs; i++) {
            for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
                if (build_dhcpv4_action(
                        op, op->lsp_addrs[i].ipv4_addrs[j].addr,
                        &options_action, &response_action, &ipv4_addr_match)) {
                    ds_put_format(
                        &match, "inport == %s && eth.src == %s && "
                        "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
                        "udp.src == 68 && udp.dst == 67", op->json_key,
                        op->lsp_addrs[i].ea_s);

                    //修改DHCP报文的option部分,给出分配的IP地址,以及回复信息
                    //然后进入下一级即response
                    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
                                  100, ds_cstr(&match),
                                  ds_cstr(&options_action));
                    ds_clear(&match);
                    ds_put_format(
                        &match, "inport == %s && eth.src == %s && "
                        "%s && udp.src == 68 && udp.dst == 67", op->json_key,
                        op->lsp_addrs[i].ea_s, ds_cstr(&ipv4_addr_match));

                    //当客户端需要renew一个新的IP的时候,需要match项更改如下
                    //ip4.src = OFFER_IP and ip4.dst = {SERVER_IP, 255.255.255.255}
                    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
                                  100, ds_cstr(&match),
                                  ds_cstr(&options_action));
                    ds_clear(&match);

                    ds_put_format(
                        &match, "inport == %s && eth.src == %s && "
                        "ip4 && udp.src == 68 && udp.dst == 67"
                        " && "REGBIT_DHCP_OPTS_RESULT, op->json_key,
                        op->lsp_addrs[i].ea_s);
                    //如果option已经修改,则修改报文的源目的,变成回复报文
                    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
                                  100, ds_cstr(&match),
                                  ds_cstr(&response_action));
                    ds_destroy(&match);
                    ds_destroy(&options_action);
                    ds_destroy(&response_action);
                    ds_destroy(&ipv4_addr_match);
                    break;
                }
            }
        }
    }

    //ingress 表11-12 0优先级,默认通过
    HMAP_FOR_EACH (od, key_node, datapaths) {
        ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
        ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
    }

    //ingress 表13 100优先级,多播广播flood
    HMAP_FOR_EACH (op, key_node, ports) {
        if (lsp_is_enabled(op->nbsp)) {
            ovn_multicast_add(mcgroups, &mc_flood, op);
        }
    }
    HMAP_FOR_EACH (od, key_node, datapaths) {
        ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
                      "outport = \""MC_FLOOD"\"; output;");
    }

    //ingress 表13, 50优先级,根据目的MAC查找出口
    //逻辑交换的逻辑端口本来应该没有MAC地址,都是记录的对端的MAC
    HMAP_FOR_EACH (op, key_node, ports) {
        for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
            struct eth_addr mac;
            //将逻辑交换的逻辑端口MAC地址进行格式转换,然后写到match里面
            if (ovs_scan(op->nbsp->addresses[i],
                        ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
                ds_clear(&match);
                ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
                              ETH_ADDR_ARGS(mac));

                ds_clear(&actions);
                ds_put_format(&actions, "outport = %s; output;", op->json_key);
                ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
                              ds_cstr(&match), ds_cstr(&actions));
            //unkown...
            } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
                if (lsp_is_enabled(op->nbsp)) {
                    ovn_multicast_add(mcgroups, &mc_unknown, op);
                    op->od->has_unknown = true;
                }
            //动态地址的MAC转换,需要从dynamic_addresses
            } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) {
                if (!op->nbsp->dynamic_addresses
                    || !ovs_scan(op->nbsp->dynamic_addresses,
                            ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
                    continue;
                }
                ds_clear(&match);
                ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
                              ETH_ADDR_ARGS(mac));

                ds_clear(&actions);
                ds_put_format(&actions, "outport = %s; output;", op->json_key);
                ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
                              ds_cstr(&match), ds_cstr(&actions));
            //router的时候,根据peer的MAC地址进行转换
            } else if (!strcmp(op->nbsp->addresses[i], "router")) {
                if (!op->peer || !op->peer->nbrp
                    || !ovs_scan(op->peer->nbrp->mac,
                            ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
                    continue;
                }
                ds_clear(&match);
                ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
                              ETH_ADDR_ARGS(mac));
                if (op->peer->od->l3dgw_port
                    && op->peer == op->peer->od->l3dgw_port
                    && op->peer->od->l3redirect_port) {
                    //???是不是端口转发,不负责的猜测
                    ds_put_format(&match, " && is_chassis_resident(%s)",
                                  op->peer->od->l3redirect_port->json_key);
                }

                ds_clear(&actions);
                ds_put_format(&actions, "outport = %s; output;", op->json_key);
                ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
                              ds_cstr(&match), ds_cstr(&actions));

                //分布式逻辑路由的NAT规则
                if (op->peer->od->l3dgw_port
                    && op->peer == op->peer->od->l3dgw_port) {
                    for (int i = 0; i < op->peer->od->nbr->n_nat; i++) {
                        const struct nbrec_nat *nat
                                                  = op->peer->od->nbr->nat[i];
                        if (!strcmp(nat->type, "dnat_and_snat")
                            && nat->logical_port && nat->external_mac
                            && eth_addr_from_string(nat->external_mac, &mac)) {

                            ds_clear(&match);
                            ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT
                                          " && is_chassis_resident(\"%s\")",
                                          ETH_ADDR_ARGS(mac),
                                          nat->logical_port);

                            ds_clear(&actions);
                            ds_put_format(&actions, "outport = %s; output;",
                                          op->json_key);
                            ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP,
                                          50, ds_cstr(&match),
                                          ds_cstr(&actions));
                        }
                    }
                }
            } else {
                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);

                VLOG_INFO_RL(&rl,
                             "%s: invalid syntax '%s' in addresses column",
                             op->nbsp->name, op->nbsp->addresses[i]);
            }
        }
    }

    //ingress 表13 0优先级,unkown的到哪里了???
    HMAP_FOR_EACH (od, key_node, datapaths) {
        if (od->has_unknown) {
            ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
                          "outport = \""MC_UNKNOWN"\"; output;");
        }
    }

    //egress 表7 0优先级,IP端口安全默认允许过
    //egress 表8 100优先级,多播广播直接发送了
    HMAP_FOR_EACH (od, key_node, datapaths) {
        ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
        ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
                      "output;");
    }

    //egress 表8 50优先级,根据配置设置不允许通过的流量
    //150优先级,丢掉没有使能的逻辑端口的流量
    HMAP_FOR_EACH (op, key_node, ports) {
        ds_clear(&match);
        ds_put_format(&match, "outport == %s", op->json_key);
        if (lsp_is_enabled(op->nbsp)) {
            build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
                                   &match);
            ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
                          ds_cstr(&match), "output;");
        } else {
            ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
                          ds_cstr(&match), "drop;");
        }

        //egress 表7 80 90优先级,根据配置设置不允许通过流量
        if (op->nbsp->n_port_security) {
            build_port_security_ip(P_OUT, op, lflows);
        }
    }
}
static void
build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                    struct hmap *lflows)
{
    //ingress 表0 100优先级的入口控制
    struct ovn_datapath *od;
    HMAP_FOR_EACH (od, key_node, datapaths) {
        //带VLAN标签的报文丢弃
        //源地址多播或者广播的报文丢弃
        ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
                      "vlan.present || eth.src[40]", "drop;");
    }

    //ingress 表0 50优先级的端口安全控制
    //允许端口配置的MAC和广播MAC的报文传输
    struct ovn_port *op;
    HMAP_FOR_EACH (op, key_node, ports) {
        ds_put_format(&match, "eth.mcast && inport == %s", op->json_key);
        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
                      ds_cstr(&match), "next;");

        ds_clear(&match);
        ds_put_format(&match, "eth.dst == %s && inport == %s",
                      op->lrp_networks.ea_s, op->json_key);
        if (op->od->l3dgw_port && op == op->od->l3dgw_port
            && op->od->l3redirect_port) {
            //是不是端口转发???
            ds_put_format(&match, " && is_chassis_resident(%s)",
                          op->od->l3redirect_port->json_key);
        }
        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
                      ds_cstr(&match), "next;");
    }

    //ingress 表1 ip input的一些固定设置
    HMAP_FOR_EACH (od, key_node, datapaths) {
        //优先级100,不合规的报文不准通过
        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
                      "ip4.mcast || "
                      "ip4.src == 255.255.255.255 || "
                      "ip4.src == 127.0.0.0/8 || "
                      "ip4.dst == 127.0.0.0/8 || "
                      "ip4.src == 0.0.0.0/8 || "
                      "ip4.dst == 0.0.0.0/8",
                      "drop;");

        //90优先级,ARP reply报文,存储在逻辑交换的ARP表,即MAC_Binding
        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
                      "put_arp(inport, arp.spa, arp.sha);");

        //50优先级,逻辑路由不能发送广播包,所以丢弃
        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
                      "eth.bcast", "drop;");

        //30优先级,ttl消亡的报文丢弃
        ds_clear(&match);
        ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
                      ds_cstr(&match), "drop;");

        //0优先级,放过
        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
    }

    //ingress 表1 ip input的设置,根据具体的IP情况
    HMAP_FOR_EACH (op, key_node, ports) {
        if (op->lrp_networks.n_ipv4_addrs) {
            //100优先级,源IP是逻辑路由端口IP或者广播IP的丢掉
            ds_clear(&match);
            ds_put_cstr(&match, "ip4.src == ");
            op_put_v4_networks(&match, op, true);
            ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
                          ds_cstr(&match), "drop;");

            //90优先级,对逻辑路由内IP的ICMP echo requests报文进行回复
            ds_clear(&match);
            ds_put_cstr(&match, "ip4.dst == ");
            op_put_v4_networks(&match, op, false);
            ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");

            ds_clear(&actions);
            ds_put_format(&actions,
                "ip4.dst <-> ip4.src; "
                "ip.ttl = 255; "
                "icmp4.type = 0; "
                "flags.loopback = 1; "
                "next; ");
            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
                          ds_cstr(&match), ds_cstr(&actions));
        }

        //90优先级,针对逻辑路由内IP的ARP requests进行回复
        for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
            ds_clear(&match);
            ds_put_format(&match,
                          "inport == %s && arp.tpa == %s && arp.op == 1",
                          op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
            if (op->od->l3dgw_port && op == op->od->l3dgw_port
                && op->od->l3redirect_port) {
                ds_put_format(&match, " && is_chassis_resident(%s)",
                              op->od->l3redirect_port->json_key);
            }

            ds_clear(&actions);
            ds_put_format(&actions,
                "eth.dst = eth.src; "
                "eth.src = %s; "
                "arp.op = 2; /* ARP reply */ "
                "arp.tha = arp.sha; "
                "arp.sha = %s; "
                "arp.tpa = arp.spa; "
                "arp.spa = %s; "
                "outport = %s; "
                "flags.loopback = 1; "
                "output;",
                op->lrp_networks.ea_s,
                op->lrp_networks.ea_s,
                op->lrp_networks.ipv4_addrs[i].addr_s,
                op->json_key);
            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
                          ds_cstr(&match), ds_cstr(&actions));
        }

        //LB的VIP也需要ARP responses的报文进行回复,同上的操作
        struct sset all_ips = SSET_INITIALIZER(&all_ips);

        for (int i = 0; i < op->od->nbr->n_load_balancer; i++) {
            struct nbrec_load_balancer *lb = op->od->nbr->load_balancer[i];
            struct smap *vips = &lb->vips;
            struct smap_node *node;

            SMAP_FOR_EACH (node, vips) {
                char *ip_address = NULL;
                uint16_t port;

                ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
                if (!sset_contains(&all_ips, ip_address)) {
                    sset_add(&all_ips, ip_address);
                }

                free(ip_address);
            }
        }

        const char *ip_address;
        SSET_FOR_EACH(ip_address, &all_ips) {
            ds_clear(&match);
            ds_put_format(&match,
                          "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
                          op->json_key, IP_ARGS(ip));

            ds_clear(&actions);
            ds_put_format(&actions,
                "eth.dst = eth.src; "
                "eth.src = %s; "
                "arp.op = 2; /* ARP reply */ "
                "arp.tha = arp.sha; "
                "arp.sha = %s; "
                "arp.tpa = arp.spa; "
                "arp.spa = "IP_FMT"; "
                "outport = %s; "
                "flags.loopback = 1; "
                "output;",
                op->lrp_networks.ea_s,
                op->lrp_networks.ea_s,
                IP_ARGS(ip),
                op->json_key);
            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
                          ds_cstr(&match), ds_cstr(&actions));
        }


        /* A gateway router can have 2 SNAT IP addresses to force DNATed and
         * LBed traffic respectively to be SNATed.  In addition, there can be
         * a number of SNAT rules in the NAT table. */
        ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
                                     (op->od->nbr->n_nat + 2));
        size_t n_snat_ips = 0;

        ovs_be32 snat_ip;
        const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
                                                           &snat_ip);
        if (dnat_force_snat_ip) {
            snat_ips[n_snat_ips++] = snat_ip;
        }

        const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
                                                         &snat_ip);
        if (lb_force_snat_ip) {
            snat_ips[n_snat_ips++] = snat_ip;
        }

        for (int i = 0; i < op->od->nbr->n_nat; i++) {
            const struct nbrec_nat *nat;

            nat = op->od->nbr->nat[i];

            ovs_be32 ip;
            if (!ip_parse(nat->external_ip, &ip) || !ip) {
                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
                VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
                             "for router %s", nat->external_ip, op->key);
                continue;
            }

            if (!strcmp(nat->type, "snat")) {
                snat_ips[n_snat_ips++] = ip;
                continue;
            }

            //EIP(DNAT)需要的arp处理
            ds_clear(&match);
            ds_put_format(&match,
                          "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
                          op->json_key, IP_ARGS(ip));

            ds_clear(&actions);
            ds_put_format(&actions,
                "eth.dst = eth.src; "
                "arp.op = 2; /* ARP reply */ "
                "arp.tha = arp.sha; ");

            //说实话,这块没懂
            if (op->od->l3dgw_port && op == op->od->l3dgw_port) {
                struct eth_addr mac;
                if (nat->external_mac &&
                    eth_addr_from_string(nat->external_mac, &mac)
                    && nat->logical_port) {
                    //分布式NAT的场景,回复的源MAC是nat->external_mac
                    ds_put_format(&actions,
                        "eth.src = "ETH_ADDR_FMT"; "
                        "arp.sha = "ETH_ADDR_FMT"; ",
                        ETH_ADDR_ARGS(mac),
                        ETH_ADDR_ARGS(mac));
                    ds_put_format(&match, " && is_chassis_resident(\"%s\")",
                                  nat->logical_port);
                } else {
                    //网络节点的NAT场景,回复的源MAC是网关的MAC
                    ds_put_format(&actions,
                        "eth.src = %s; "
                        "arp.sha = %s; ",
                        op->lrp_networks.ea_s,
                        op->lrp_networks.ea_s);
                    if (op->od->l3redirect_port) {
                        ds_put_format(&match, " && is_chassis_resident(%s)",
                                      op->od->l3redirect_port->json_key);
                    }
                }
            } else {
                ds_put_format(&actions,
                    "eth.src = %s; "
                    "arp.sha = %s; ",
                    op->lrp_networks.ea_s,
                    op->lrp_networks.ea_s);
            }
            ds_put_format(&actions,
                "arp.tpa = arp.spa; "
                "arp.spa = "IP_FMT"; "
                "outport = %s; "
                "flags.loopback = 1; "
                "output;",
                IP_ARGS(ip),
                op->json_key);
            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
                          ds_cstr(&match), ds_cstr(&actions));
        }

        ds_clear(&match);
        ds_put_cstr(&match, "ip4.dst == {");
        bool has_drop_ips = false;
        for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
            bool snat_ip_is_router_ip = false;
            for (int j = 0; j < n_snat_ips; j++) {
                //到SNAT IP的报文丢弃,因为这是一个虚拟IP,不处理任何报文
                if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
                    snat_ip_is_router_ip = true;
                    break;
                }
            }
            if (snat_ip_is_router_ip) {
                continue;
            }
            ds_put_format(&match, "%s, ",
                          op->lrp_networks.ipv4_addrs[i].addr_s);
            has_drop_ips = true;
        }
        ds_chomp(&match, ' ');
        ds_chomp(&match, ',');
        ds_put_cstr(&match, "}");

        if (has_drop_ips) {
            //到网关router的报文也不处理,之前的ARP和ICMP已经代答
            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
                          ds_cstr(&match), "drop;");
        }

        free(snat_ips);
    }

    //处理NAT、分片和LB的操作
    HMAP_FOR_EACH (od, key_node, datapaths) {
        //ingress的表2-4,egress的表0-3的默认流表0优先级是通过
        ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
        ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
        ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
        ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
        ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");

        if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port
            && !smap_get(&od->nbr->options, "underlay-gateway")) {
            continue;
        }

        ovs_be32 snat_ip;
        const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
                                                           &snat_ip);
        const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
                                                         &snat_ip);

        for (int i = 0; i < od->nbr->n_nat; i++) {
            const struct nbrec_nat *nat;

            nat = od->nbr->nat[i];

            ovs_be32 ip, mask;

            char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
            if (error || mask != OVS_BE32_MAX) {
                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
                VLOG_WARN_RL(&rl, "bad external ip %s for nat",
                             nat->external_ip);
                free(error);
                continue;
            }

            //检测NAT的logical_ip是否合理,SNAT的时候,logical_ip可以是一个子网
            error = ip_parse_masked(nat->logical_ip, &ip, &mask);
            if (!strcmp(nat->type, "snat")) {
                if (error) {
                    static struct vlog_rate_limit rl =
                        VLOG_RATE_LIMIT_INIT(5, 1);
                    VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
                                 "in router "UUID_FMT"",
                                 nat->logical_ip, UUID_ARGS(&od->key));
                    free(error);
                    continue;
                }
            } else {
                if (error || mask != OVS_BE32_MAX) {
                    static struct vlog_rate_limit rl =
                        VLOG_RATE_LIMIT_INIT(5, 1);
                    VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
                        ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
                    free(error);
                    continue;
                }
            }

            //分布式路由NAT,确定NAT规则是否满足分布式NAT的操作
            bool distributed = false;
            struct eth_addr mac;
            if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
                nat->logical_port && nat->external_mac) {
                if (eth_addr_from_string(nat->external_mac, &mac)) {
                    distributed = true;
                } else {
                    static struct vlog_rate_limit rl =
                        VLOG_RATE_LIMIT_INIT(5, 1);
                    VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
                        ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
                    continue;
                }
            }

            //ingress 表3 ,必须是egress的SNAT已经创建了连接以及反向连接。
            if (!strcmp(nat->type, "snat")
                || !strcmp(nat->type, "dnat_and_snat")) {
                if (!od->l3dgw_port) {
                    //网关路由
                    ds_clear(&match);
                    ds_put_format(&match, "ip && ip4.dst == %s",
                                  nat->external_ip);
                    ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90,
                                  ds_cstr(&match), "ct_snat; next;");
                } else {
                    //分布式路由,网关进来的报文进行unnat
                    ds_clear(&match);
                    ds_put_format(&match, "ip && ip4.dst == %s"
                                          " && inport == %s",
                                  nat->external_ip,
                                  od->l3dgw_port->json_key);
                    if (!distributed && od->l3redirect_port) {
                        ds_put_format(&match, " && is_chassis_resident(%s)",
                                      od->l3redirect_port->json_key);
                    }
                    ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
                                  ds_cstr(&match), "ct_snat;");

                    //貌似是没有匹配上面的流表,表示不是网关端口来的
                    //是从其他路由端口来的,需要重定向到l3dgw_port操作NAT
                    ds_clear(&match);
                    ds_put_format(&match, "ip && ip4.dst == %s",
                                  nat->external_ip);
                    ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 50,
                                  ds_cstr(&match),
                                  REGBIT_NAT_REDIRECT" = 1; next;");
                }
            }

            //ingress 表4 DNAT
            int dnat = 0, pro = 100;
            if (!strcmp(nat->type, "dnat")
                || !strcmp(nat->type, "dnat_and_snat")) {
                if (!od->l3dgw_port) {
                    //网关路由
                    ds_clear(&match);
                    ds_put_format(&match, "ip && ip4.dst == %s",
                                  nat->external_ip);
                    if(!strcmp(nat->type, "dnat")){
                        if(nat->eport && nat->protocol){
                            if(!strcmp(nat->protocol, "tcp")) {
                                ds_put_format(&match, " && tcp && tcp.dst == %d", (uint16_t)nat->eport);
                                dnat = 1;
                            } else if (!strcmp(nat->protocol, "udp")) {
                                ds_put_format(&match, " && udp && udp.dst == %d", (uint16_t)nat->eport);
                                dnat = 1;
                            } else { continue; }
                        } else { ;}
                    }
                    ds_clear(&actions);
                    if (dnat_force_snat_ip) {
                        //该标志表示egress的SNAT需要执行
                        ds_put_format(&actions,
                                      "flags.force_snat_for_dnat = 1; ");
                    }
                    if(dnat) {
                        pro = 90;
                        ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s:%d);", nat->logical_ip, (int)(nat->lport?:nat->eport));
                    } else {
                        ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);", nat->logical_ip);
                    }
                    ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, pro,
                                  ds_cstr(&match), ds_cstr(&actions));
                } else {
                    //分布式路由,入端口是l3dgw_port
                    ds_clear(&match);
                    ds_put_format(&match, "ip && ip4.dst == %s"
                                  " && inport == %s",
                                  nat->external_ip,
                                  od->l3dgw_port->json_key);
                    if (!distributed && od->l3redirect_port) {
                        ds_put_format(&match, " && is_chassis_resident(%s)",
                                      od->l3redirect_port->json_key);
                    }
                    ds_clear(&actions);
                    ds_put_format(&actions, "ct_dnat(%s);",
                                  nat->logical_ip);
                    ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
                                  ds_cstr(&match), ds_cstr(&actions));

                    ds_clear(&match);
                    ds_put_format(&match, "ip && ip4.dst == %s",
                                  nat->external_ip);
                    ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
                                  ds_cstr(&match),
                                  REGBIT_NAT_REDIRECT" = 1; next;");
                }
            }

            //egress 表0 UNDNAT,必须是ingress的DNAT连接已经创建的情况下
            if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
                || !strcmp(nat->type, "dnat_and_snat"))) {
                ds_clear(&match);
                ds_put_format(&match, "ip && ip4.src == %s"
                                      " && outport == %s",
                              nat->logical_ip,
                              od->l3dgw_port->json_key);
                if (!distributed && od->l3redirect_port) {
                    ds_put_format(&match, " && is_chassis_resident(%s)",
                                  od->l3redirect_port->json_key);
                }
                ds_clear(&actions);
                if (distributed) {
                    ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
                                  ETH_ADDR_ARGS(mac));
                }
                ds_put_format(&actions, "ct_dnat;");
                ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
                              ds_cstr(&match), ds_cstr(&actions));
            }

            //egress 表1 SNAT
            if (!strcmp(nat->type, "snat")
                || !strcmp(nat->type, "dnat_and_snat")) {
                if (!od->l3dgw_port) {
                    //网关路由
                    ds_clear(&match);
                    ds_put_format(&match, "ip && ip4.src == %s",
                                  nat->logical_ip);
                    ds_clear(&actions);
                    ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);

                    //掩码越长,优先级越高
                    ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
                                  count_1bits(ntohl(mask)) + 1,
                                  ds_cstr(&match), ds_cstr(&actions));
                } else {
                    //分布式路由
                    ds_clear(&match);
                    ds_put_format(&match, "ip && ip4.src == %s"
                                          " && outport == %s",
                                  nat->logical_ip,
                                  od->l3dgw_port->json_key);
                    if (!distributed && od->l3redirect_port) {
                        ds_put_format(&match, " && is_chassis_resident(%s)",
                                      od->l3redirect_port->json_key);
                    }
                    ds_clear(&actions);
                    if (distributed) {
                        ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
                                      ETH_ADDR_ARGS(mac));
                    }
                    ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);

                    //同样是掩码越长,优先级越高
                    ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
                                  count_1bits(ntohl(mask)) + 1,
                                  ds_cstr(&match), ds_cstr(&actions));
                }
            }

            //ingress 表0,50优先级允许支持NAT的报文通过
            //入口是l3dgw_port,并且目的MAC是nat->external_mac的时候通过
            if (distributed) {
                ds_clear(&match);
                ds_put_format(&match,
                              "eth.dst == "ETH_ADDR_FMT" && inport == %s"
                              " && is_chassis_resident(\"%s\")",
                              ETH_ADDR_ARGS(mac),
                              od->l3dgw_port->json_key,
                              nat->logical_port);
                ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 50,
                              ds_cstr(&match), "next;");
            }

            //ingress 表7,100优先级允许NAT报文通过
            if (distributed) {
                ds_clear(&match);
                ds_put_format(&match, "ip4.src == %s && outport == %s",
                              nat->logical_ip,
                              od->l3dgw_port->json_key);
                ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 100,
                              ds_cstr(&match), "next;");
            }

            //egress 表2,100优先级如目的IP和EIP相等的话,需要报文clone回注到egress的表0
            if (od->l3dgw_port) {
                //分布式路由
                ds_clear(&match);
                ds_put_format(&match, "ip4.dst == %s && outport == %s",
                              nat->external_ip,
                              od->l3dgw_port->json_key);
                ds_clear(&actions);
                ds_put_format(&actions,
                              "clone { ct_clear; "
                              "inport = outport; outport = \"\"; "
                              "flags = 0; flags.loopback = 1; ");
                for (int i = 0; i < MFF_N_LOG_REGS; i++) {
                    ds_put_format(&actions, "reg%d = 0; ", i);
                }
                ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
                              "next(pipeline=ingress, table=0); };");
                ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
                              ds_cstr(&match), ds_cstr(&actions));
            }
        }

        //ingress 表3,110优先级强制SNAT之前DNAT过的报文,网关路由
        if (dnat_force_snat_ip && !od->l3dgw_port) {
            //如果报文的目的IP是网关路由的IP,那么进行UNSNAT
            ds_clear(&match);
            ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip);
            ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110,
                          ds_cstr(&match), "ct_snat; next;");

            //已经DNAT过得报文走这一条,但是SNAT
            ds_clear(&match);
            ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
            ds_clear(&actions);
            ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
            ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
                          ds_cstr(&match), ds_cstr(&actions));
        }
        if (lb_force_snat_ip && !od->l3dgw_port) {
            //同上,报文的目的IP是网关路由IP,进行UNSNAT
            ds_clear(&match);
            ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip);
            ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
                          ds_cstr(&match), "ct_snat; next;");

            //有force_snat_for_lb标志的进行SNAT
            ds_clear(&match);
            ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip");
            ds_clear(&actions);
            ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip);
            ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
                          ds_cstr(&match), ds_cstr(&actions));
        }

        if (!od->l3dgw_port) {
            //网关路由,重新循环每个报文到DNAT区域
            //需要UNDNAT的报文都会进行UNDNAT,理想情况下,可以在egress完成
            //但是由于网关路由器没有任何关于将源IP地址作为IP路由的EIP的特性
            //所以我们可以在此处进行操作,从而省去了以后的一次重新循环
            //任何通过SNAT区域的报文都会自动重新循环
            //来得到openflow pipeline的路由需要的新目的IP
            ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
                              "ip4", "flags.loopback = 1; ct_dnat;");
        } else {
            //分布式路由的NAT,向ingress中没有NAT规则的IP路由表、ARP处理表、网关重定向添加流表
            //ingress的表5 300优先级添加流表
            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
                          REGBIT_NAT_REDIRECT" == 1", "ip.ttl--; next;");

            //ingress的表5 200优先级添加流表,修改目的MAC位分布式网关端口的地址
            ds_clear(&actions);
            ds_put_format(&actions, "eth.dst = %s; next;",
                          od->l3dgw_port->lrp_networks.ea_s);
            ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 200,
                          REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));

            //ingress的表7 200优先级添加流表
            ds_clear(&actions);
            ds_put_format(&actions, "outport = %s; next;",
                          od->l3redirect_port->json_key);
            ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 200,
                          REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
        }

        //处理所有需要重组或者conntrack的IP的设置
        struct sset all_ips = SSET_INITIALIZER(&all_ips);

        for (int i = 0; i < od->nbr->n_load_balancer; i++) {
            struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
            struct smap *vips = &lb->vips;
            struct smap_node *node;

            SMAP_FOR_EACH (node, vips) {
                uint16_t port = 0;

                char *ip_address = NULL;
                ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
                if (!ip_address) {
                    continue;
                }

                if (!sset_contains(&all_ips, ip_address)) {
                    sset_add(&all_ips, ip_address);
                }

                //ingress 表4 DNAT中的LB添加高级的规则,120和110优先级
                //每个匹配项,我们通过add_router_lb_flow()添加两条流表
                //一条流表为ct.new添加操作ct_lb($targets)
                //另一条流表为ct.est添加操作ct_dnat;
                ds_clear(&actions);
                ds_put_format(&actions, "ct_lb(%s);", node->value);

                ds_clear(&match);
                ds_put_format(&match, "ip && ip4.dst == %s",
                              ip_address);
                free(ip_address);

                if (port) {
                    if (lb->protocol && !strcmp(lb->protocol, "udp")) {
                        ds_put_format(&match, " && udp && udp.dst == %d",
                                      port);
                    } else {
                        ds_put_format(&match, " && tcp && tcp.dst == %d",
                                      port);
                    }
                    add_router_lb_flow(lflows, od, &match, &actions, 120,
                                       lb_force_snat_ip);
                } else {
                    add_router_lb_flow(lflows, od, &match, &actions, 110,
                                       lb_force_snat_ip);
                }
            }
        }

        //如果有LB规则,我们应该将报文发送到重组和conntrack中
        //有了conntrack,我们可以只从组里的新连接去设置DNAT IP
        //如果LB规则中有L4的端口,我们需要重组进行匹配L4端口
        const char *ip_address;
        SSET_FOR_EACH(ip_address, &all_ips) {
            ds_clear(&match);
            ds_put_format(&match, "ip && ip4.dst == %s", ip_address);
            ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG,
                          100, ds_cstr(&match), "ct_next;");
        }

        sset_destroy(&all_ips);
    }

    //ingress 表5 该表设置出口为正确的出口,并且源MAC修改为出口的MAC
    //下一跳的IP从reg0中拿到,并且下一条表处理ARP resolution
    HMAP_FOR_EACH (op, key_node, ports) {
        for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
            add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
                      op->lrp_networks.ipv4_addrs[i].network_s,
                      op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL);
        }

        for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
            add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
                      op->lrp_networks.ipv6_addrs[i].network_s,
                      op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL);
        }
    }

    //将静态路由转换为流表
    HMAP_FOR_EACH (od, key_node, datapaths) {
        for (int i = 0; i < od->nbr->n_static_routes; i++) {
            const struct nbrec_logical_router_static_route *route;

            route = od->nbr->static_routes[i];
            build_static_route_flow(lflows, od, ports, route);
        }
    }

    //ingress 表6
    //下一跳的IP地址存储在reg0,该表处理reg0中的IP设置到出口并且将MAC设置到报文目的MAC
    HMAP_FOR_EACH (op, key_node, ports) {
        if (op->nbrp) {
            //当前状况是逻辑路由端口,当下一跳IP(存于reg0中)匹配该路由端口时,
            //流表出口设置为这个逻辑端口,并且将该口的MAC设置为报文的目的MAC
            //报文依然在对端的逻辑pipeline中,所以应当匹配对端的出口
            if (op->peer && op->nbrp->peer) {
                if (op->lrp_networks.n_ipv4_addrs) {
                    ds_clear(&match);
                    ds_put_format(&match, "outport == %s && reg0 == ",
                                  op->peer->json_key);
                    op_put_v4_networks(&match, op, false);

                    ds_clear(&actions);
                    ds_put_format(&actions, "eth.dst = %s; next;",
                                  op->lrp_networks.ea_s);
                    ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
                                  100, ds_cstr(&match), ds_cstr(&actions));
                }
            }
        } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
            //当前状况是逻辑交换的端口,连接了一个VM或者容器
            //解析里面的地址,为每个地址遍历连接到逻辑交换的所有路由端口
            //如果地址从路由端口科大,添加ARP表到路由的pipeline

            for (size_t i = 0; i < op->n_lsp_addrs; i++) {
                const char *ea_s = op->lsp_addrs[i].ea_s;
                for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
                    const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
                    for (size_t k = 0; k < op->od->n_router_ports; k++) {
                        const char *peer_name = smap_get(
                            &op->od->router_ports[k]->nbsp->options,
                            "router-port");
                        ds_clear(&match);
                        ds_put_format(&match, "outport == %s && reg0 == %s",
                                      peer->json_key, ip_s);

                        ds_clear(&actions);
                        ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
                        ovn_lflow_add(lflows, peer->od,
                                      S_ROUTER_IN_ARP_RESOLVE, 100,
                                      ds_cstr(&match), ds_cstr(&actions));
                    }
                }

                for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
                    const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
                    for (size_t k = 0; k < op->od->n_router_ports; k++) {
                        const char *peer_name = smap_get(
                            &op->od->router_ports[k]->nbsp->options,
                            "router-port");
                        ds_clear(&match);
                        ds_put_format(&match, "outport == %s && xxreg0 == %s",
                                      peer->json_key, ip_s);

                        ds_clear(&actions);
                        ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
                        ovn_lflow_add(lflows, peer->od,
                                      S_ROUTER_IN_ARP_RESOLVE, 100,
                                      ds_cstr(&match), ds_cstr(&actions));
                    }
                }
            }
        } else if (!strcmp(op->nbsp->type, "router")) {
            //当前是连接路由的逻辑交换端口
            //该交换端口的对端是路由端口
            //我们需要添加逻辑流表,能够对所有连接到该交换机的路由端口添加ARP信息
            const char *peer_name = smap_get(&op->nbsp->options,
                                             "router-port");
            struct ovn_port *peer = ovn_port_find(ports, peer_name);
            for (size_t i = 0; i < op->od->n_router_ports; i++) {
                const char *router_port_name = smap_get(
                                    &op->od->router_ports[i]->nbsp->options,
                                    "router-port");
                struct ovn_port *router_port = ovn_port_find(ports,
                                                             router_port_name);
                if (router_port->lrp_networks.n_ipv4_addrs) {
                    ds_clear(&match);
                    ds_put_format(&match, "outport == %s && reg0 == ",
                                  peer->json_key);
                    op_put_v4_networks(&match, router_port, false);

                    ds_clear(&actions);
                    ds_put_format(&actions, "eth.dst = %s; next;",
                                              router_port->lrp_networks.ea_s);
                    ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
                                  100, ds_cstr(&match), ds_cstr(&actions));
                }

                if (router_port->lrp_networks.n_ipv6_addrs) {
                    ds_clear(&match);
                    ds_put_format(&match, "outport == %s && xxreg0 == ",
                                  peer->json_key);
                    op_put_v6_networks(&match, router_port);

                    ds_clear(&actions);
                    ds_put_format(&actions, "eth.dst = %s; next;",
                                  router_port->lrp_networks.ea_s);
                    ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
                                  100, ds_cstr(&match), ds_cstr(&actions));
                }
            }
        }
    }

    HMAP_FOR_EACH (od, key_node, datapaths) {
        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
                      "get_arp(outport, reg0); next;");
    }

    //ingress 表7 分布式路由中,出口和l3dgw_port相等时
    //这个表会将一个子网的流量转发到l3redirect_port
    HMAP_FOR_EACH (od, key_node, datapaths) {
        if (od->l3dgw_port && od->l3redirect_port) {
            //当流量的出口等于l3dgw_port,如果报文不匹配更高级的重定向规则
            //那么流量将重定向到l3dgw_port的中心实例
            ds_clear(&match);
            ds_put_format(&match, "outport == %s",
                          od->l3dgw_port->json_key);
            ds_clear(&actions);
            ds_put_format(&actions, "outport = %s; next;",
                          od->l3redirect_port->json_key);
            ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
                          ds_cstr(&match), ds_cstr(&actions));

            //如果目的MAC已经处理了,则重定向到l3dgw_port的中心实例
            //这些流量在进行重定向到中心实例之前,将被ingress表中的ARP request替代
            ds_put_format(&match, " && eth.dst == 00:00:00:00:00:00");
            ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 150,
                          ds_cstr(&match), ds_cstr(&actions));
        }

        //0优先级默认是转发
        ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
    }

    //ingress 表8 100优先级用来发送ARP Request
    //主要是目的MAC修改为广播、源IP从reg1取,目标IP从reg0取、操作为request,最后发送
    //0优先级表示目的MAC地址可以处理,直接发送
    HMAP_FOR_EACH (od, key_node, datapaths) {
        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
                      "eth.dst == 00:00:00:00:00:00",
                      "arp { "
                      "eth.dst = ff:ff:ff:ff:ff:ff; "
                      "arp.spa = reg1; "
                      "arp.tpa = reg0; "
                      "arp.op = 1; " /* ARP request */
                      "output; "
                      "};");
        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
    }

    //egress 表3 100优先级直接匹配逻辑出口,执行发送
    HMAP_FOR_EACH (op, key_node, ports) {
        ds_clear(&match);
        ds_put_format(&match, "outport == %s", op->json_key);
        ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
                      ds_cstr(&match), "output;");
    }
}
最后修改:2021 年 08 月 18 日
如果觉得我的文章对你有用,请随意赞赏