sequenceDiagram
autonumber
participant K as Linux Kernel
box lightyellow bgp容器
participant Z as zebra
participant FPM as fpmsyncd
end
box pink database容器
participant R as Redis
end
box lightblue swss容器
participant OA as orchagent
end
box lightgreen syncd容器
participant SD as syncd
end
participant A as ASIC
K->>FPM: 内核路由变更时通过Netlink发送通知
Z->>FPM: 通过FPM接口和Netlink<br/>消息格式发送路由变更通知
FPM->>R: 通过ProducerStateTable<br/>将路由变更信息写入<br/>APPL_DB
R->>OA: 通过ConsumerStateTable<br/>接收路由变更信息
OA->>OA: 处理路由变更信息<br/>生成SAI路由对象
OA->>SD: 通过ProducerTable<br/>或者ZMQ将SAI路由对象<br/>发给syncd
SD->>R: 接收SAI路由对象,写入ASIC_DB
SD->>A: 通过SAI接口<br/>配置ASIC
while (true) { try { // Launching FPM server and wait for zebra to connect. FpmLink fpm(&sync); ...
fpm.accept(); ... } catch (FpmLink::FpmConnectionClosedException &e) { // If connection is closed, keep retrying until it succeeds, before handling any other events. cout << "Connection lost, reconnecting..." << endl; } ... } }
这样,所有的路由变更消息都会以Netlink的形式发送给RouteSync,其中[EVPN Type 5][EVPN]必须以原始消息的形式进行处理,所以会发送给onMsgRaw,其他的消息都会统一的发给处理Netlink的onMsg回调:(关于Netlink如何接收和处理消息,请移步之前总结的通信机制一篇)
/* Read all netlink messages inside FPM message */ for (; NLMSG_OK (nl_hdr, msg_len); nl_hdr = NLMSG_NEXT(nl_hdr, msg_len)) { /* * EVPN Type5 Add Routes need to be process in Raw mode as they contain * RMAC, VLAN and L3VNI information. * Where as all other route will be using rtnl api to extract information * from the netlink msg. */ bool isRaw = isRawProcessing(nl_hdr); nl_msg *msg = nlmsg_convert(nl_hdr); ... nlmsg_set_proto(msg, NETLINK_ROUTE);
if (isRaw) { /* EVPN Type5 Add route processing */ /* This will call into onRawMsg() */ processRawMsg(nl_hdr); } else { /* This will call into onMsg() */ NetDispatcher::getInstance().onNetlinkMessage(msg); }
structrtnl_route *route_obj = (struct rtnl_route *)obj; auto family = rtnl_route_get_family(route_obj); if (family == AF_MPLS) { onLabelRouteMsg(nlmsg_type, obj); return; } ...
unsignedint master_index = rtnl_route_get_table(route_obj); char master_name[IFNAMSIZ] = {0}; if (master_index) { /* If the master device name starts with VNET_PREFIX, it is a VNET route. The VNET name is exactly the name of the associated master device. */ getIfName(master_index, master_name, IFNAMSIZ); if (string(master_name).find(VNET_PREFIX) == 0) { onVnetRouteMsg(nlmsg_type, obj, string(master_name)); }
/* Otherwise, it is a regular route (include VRF route). */ else { onRouteMsg(nlmsg_type, obj, master_name); } } else { onRouteMsg(nlmsg_type, obj, NULL); } }
// File: src/sonic-swss/orchagent/routeorch.cpp voidRouteOrch::doTask(Consumer& consumer) { // Calling PortOrch to make sure all ports are ready before processing route messages. if (!gPortsOrch->allPortsReady()) { return; }
// Call doLabelTask() instead, if the incoming messages are from MPLS messages. Otherwise, move on as regular routes. ...
/* Default handling is for ROUTE_TABLE (regular routes) */ auto it = consumer.m_toSync.begin(); while (it != consumer.m_toSync.end()) { // Add or remove routes with a route bulker while (it != consumer.m_toSync.end()) { KeyOpFieldsValuesTuple t = it->second;
// Parse route operation from the incoming message here. string key = kfvKey(t); string op = kfvOp(t); ...
// resync application: // - When routeorch receives 'resync' message (key = "resync", op = "SET"), it marks all current routes as dirty // and waits for 'resync complete' message. For all newly received routes, if they match current dirty routes, // it unmarks them dirty. // - After receiving 'resync complete' (key = "resync", op != "SET") message, it creates all newly added routes // and removes all dirty routes. ...
// Parsing VRF and IP prefix from the incoming message here. ...
// Process regular route operations. if (op == SET_COMMAND) { // Parse and validate route attributes from the incoming message here. string ips; string aliases; ...
// If the nexthop_group is empty, create the next hop group key based on the IPs and aliases. // Otherwise, get the key from the NhgOrch. The result will be stored in the "nhg" variable below. NextHopGroupKey& nhg = ctx.nhg; ... if (nhg_index.empty()) { // Here the nexthop_group is empty, so we create the next hop group key based on the IPs and aliases. ...
string nhg_str = ""; if (blackhole) { nhg = NextHopGroupKey(); } elseif (srv6_nh == true) { ... nhg = NextHopGroupKey(nhg_str, overlay_nh, srv6_nh); } elseif (overlay_nh == false) { ... nhg = NextHopGroupKey(nhg_str, weights); } else { ... nhg = NextHopGroupKey(nhg_str, overlay_nh, srv6_nh); } } else { // Here we have a nexthop_group, so we get the key from the NhgOrch. const NhgBase& nh_group = getNhg(nhg_index); nhg = nh_group.getNhgKey(); ... } ...
// Now we start to create the SAI route entry. if (nhg.getSize() == 1 && nhg.hasIntfNextHop()) { // Skip certain routes, such as not valid, directly routes to tun0, linklocal or multicast routes, etc. ...
// Create SAI route entry in addRoute function. if (addRoute(ctx, nhg)) it = consumer.m_toSync.erase(it); else it++; }
/* * Check if the route does not exist or needs to be updated or * if the route is using a temporary next hop group owned by * NhgOrch. */ elseif (m_syncdRoutes.find(vrf_id) == m_syncdRoutes.end() || m_syncdRoutes.at(vrf_id).find(ip_prefix) == m_syncdRoutes.at(vrf_id).end() || m_syncdRoutes.at(vrf_id).at(ip_prefix) != RouteNhg(nhg, ctx.nhg_index) || gRouteBulker.bulk_entry_pending_removal(route_entry) || ctx.using_temp_nhg) { if (addRoute(ctx, nhg)) it = consumer.m_toSync.erase(it); else it++; } ... } // Handle other ops, like DEL_COMMAND for route deletion, etc. ... }
// Flush the route bulker, so routes will be written to syncd and ASIC gRouteBulker.flush();
// Go through the bulker results. // Handle SAI failures, update neighbors, counters, send notifications in add/removeRoutePost functions. ...
/* Remove next hop group if the reference count decreases to zero */ ... } }
// File: src/sonic-swss/orchagent/routeorch.cpp boolRouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) { // Get nexthop information from NeighOrch. // We also need to check PortOrch for inband port, IntfsOrch to ensure the related interface is created and etc. ... // Start to sync the SAI route entry. sai_route_entry_t route_entry; route_entry.vr_id = vrf_id; route_entry.switch_id = gSwitchId; copy(route_entry.destination, ipPrefix);
sai_attribute_t route_attr; auto& object_statuses = ctx.object_statuses; // Create a new route entry in this case. // // In case the entry is already pending removal in the bulk, it would be removed from m_syncdRoutes during the bulk call. // Therefore, such entries need to be re-created rather than set attribute. if (it_route == m_syncdRoutes.at(vrf_id).end() || gRouteBulker.bulk_entry_pending_removal(route_entry)) { if (blackhole) { route_attr.id = SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION; route_attr.value.s32 = SAI_PACKET_ACTION_DROP; } else { route_attr.id = SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID; route_attr.value.oid = next_hop_id; }
/* Default SAI_ROUTE_ATTR_PACKET_ACTION is SAI_PACKET_ACTION_FORWARD */ object_statuses.emplace_back(); sai_status_t status = gRouteBulker.create_entry(&object_statuses.back(), &route_entry, 1, &route_attr); if (status == SAI_STATUS_ITEM_ALREADY_EXISTS) { returnfalse; } } // Update existing route entry in this case. else { // Set the packet action to forward when there was no next hop (dropped) and not pointing to blackhole. if (it_route->second.nhg_key.getSize() == 0 && !blackhole) { route_attr.id = SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION; route_attr.value.s32 = SAI_PACKET_ACTION_FORWARD;
// File: src/sonic-swss/orchagent/bulker.h template <typename T> classEntityBulker { public: using Ts = SaiBulkerTraits<T>; using Te = typename Ts::entry_t; ...
voidflush() { // Bulk remove entries if (!removing_entries.empty()) { // Split into batches of max_bulk_size, then call flush. Similar to creating_entries, so details are omitted. std::vector<Te> rs; ... flush_removing_entries(rs); removing_entries.clear(); }
// Bulk create entries if (!creating_entries.empty()) { // Split into batches of max_bulk_size, then call flush_creating_entries to call SAI batch create API to create // the objects in batch. std::vector<Te> rs; std::vector<sai_attribute_tconst*> tss; std::vector<uint32_t> cs; for (autoconst& i: creating_entries) { sai_object_id_t *pid = std::get<0>(i); autoconst& attrs = std::get<1>(i); if (*pid == SAI_NULL_OBJECT_ID) { rs.push_back(pid); tss.push_back(attrs.data()); cs.push_back((uint32_t)attrs.size());
// Bulk update existing entries if (!setting_entries.empty()) { // Split into batches of max_bulk_size, then call flush. Similar to creating_entries, so details are omitted. std::vector<Te> rs; std::vector<sai_attribute_t> ts; std::vector<sai_status_t*> status_vector; ... flush_setting_entries(rs, ts, status_vector); setting_entries.clear(); } }
// Call SAI bulk create API size_t count = rs.size(); std::vector<sai_status_t> statuses(count); sai_status_t status = (*create_entries)((uint32_t)count, rs.data(), cs.data(), tss.data() , SAI_BULK_OP_ERROR_MODE_IGNORE_ERROR, statuses.data());
// Set results back to input entries and clean up the batch below. for (size_t ir = 0; ir < count; ir++) { auto& entry = rs[ir]; sai_status_t *object_status = creating_entries[entry].second; if (object_status) { *object_status = statuses[ir]; } }
// Server is responsible for generate new OID but for that we need switch ID // to be sent to server as well, so instead of sending empty oids we will // send switch IDs for (uint32_t idx = 0; idx < object_count; idx++) { serialized_object_ids.emplace_back(sai_serialize_object_id(switch_id)); } auto status = bulkCreate(object_type, serialized_object_ids, attr_count, attr_list, mode, object_statuses);
// Since user requested create, OID value was created remotely and it was returned in m_lastCreateOids for (uint32_t idx = 0; idx < object_count; idx++) { if (object_statuses[idx] == SAI_STATUS_SUCCESS) { object_id[idx] = m_lastCreateOids.at(idx); } else { object_id[idx] = SAI_NULL_OBJECT_ID; } }