diff --git a/vpr/src/route/ParallelNetlistRouter.h b/vpr/src/route/ParallelNetlistRouter.h index e60a7bd14ab..e562da15627 100644 --- a/vpr/src/route/ParallelNetlistRouter.h +++ b/vpr/src/route/ParallelNetlistRouter.h @@ -1,12 +1,23 @@ #pragma once -/** @file Parallel case for NetlistRouter. Builds a PartitionTree from the - * netlist according to net bounding boxes and goes over it with a tbb::task_group. - * [!] Is not expected to use more than 2 effective threads on avg. */ +/** @file Parallel case for NetlistRouter. Builds a \ref PartitionTree from the + * netlist according to net bounding boxes. Tree nodes are then routed in parallel + * using tbb::task_group. Each task routes the nets inside a node serially and then adds + * its child nodes to the task queue. This approach is serially equivalent & deterministic, + * but it can reduce QoR in congested cases [0]. + * + * Note that the parallel router does not support graphical router breakpoints. + * + * [0]: F. Koşar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */ #include "netlist_routers.h" #include +/** Parallel impl for NetlistRouter. + * Holds enough context members to glue together ConnectionRouter and net routing functions, + * such as \ref route_net. Keeps the members in thread-local storage where needed, + * i.e. ConnectionRouters and RouteIterResults-es. + * See \ref route_net. */ template class ParallelNetlistRouter : public NetlistRouter { public: @@ -37,12 +48,17 @@ class ParallelNetlistRouter : public NetlistRouter { , _is_flat(is_flat) {} ~ParallelNetlistRouter() {} + /** Run a single iteration of netlist routing for this->_net_list. This usually means calling + * \ref route_net for each net, which will handle other global updates. + * \return RouteIterResults for this iteration. */ RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack); void set_rcv_enabled(bool x); void set_timing_info(std::shared_ptr timing_info); private: + /** A single task to route nets inside a PartitionTree node and add tasks for its child nodes to task group \p g. */ void route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node, int itry, float pres_fac, float worst_neg_slack); + ConnectionRouter _make_router(const RouterLookahead* router_lookahead, bool is_flat) { auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.mutable_routing(); @@ -57,6 +73,7 @@ class ParallelNetlistRouter : public NetlistRouter { route_ctx.rr_node_route_inf, is_flat); } + /* Context fields */ tbb::enumerable_thread_specific> _routers_th; const Netlist<>& _net_list; diff --git a/vpr/src/route/ParallelNetlistRouter.tpp b/vpr/src/route/ParallelNetlistRouter.tpp index ba1a9a4dfe2..3c73b784b6e 100644 --- a/vpr/src/route/ParallelNetlistRouter.tpp +++ b/vpr/src/route/ParallelNetlistRouter.tpp @@ -6,20 +6,23 @@ #include "route_net.h" #include "vtr_time.h" -/** A parallel netlist router. Builds a PartitionTree from the netlist depending on bounding boxes - * and goes over it with a tbb::task_group. Is not expected to use more than 2 effective threads on avg. */ template inline RouteIterResults ParallelNetlistRouter::route_netlist(int itry, float pres_fac, float worst_neg_slack) { + /* Reset results for each thread */ for (auto& results : _results_th) { results = RouteIterResults(); } + /* Organize netlist into a PartitionTree. + * Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */ PartitionTree tree(_net_list); + /* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */ tbb::task_group g; route_partition_tree_node(g, tree.root(), itry, pres_fac, worst_neg_slack); g.wait(); + /* Combine results from threads */ RouteIterResults out; for (auto& results : _results_th) { out.stats.combine(results.stats); @@ -40,7 +43,7 @@ void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& vtr::Timer t; for (auto net_id : node.nets) { - auto flags = try_timing_driven_route_net( + auto flags = route_net( _routers_th.local(), _net_list, net_id, @@ -59,11 +62,13 @@ void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& _choking_spots[net_id], _is_flat); - if (!flags.success && !flags.retry_with_full_bb) { /* Disconnected RRG */ + if (!flags.success && !flags.retry_with_full_bb) { + /* Disconnected RRG and ConnectionRouter doesn't think growing the BB will work */ _results_th.local().is_routable = false; return; } - if (flags.retry_with_full_bb) { /* Grow the BB and leave it to the next iteration */ + if (flags.retry_with_full_bb) { + /* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */ route_ctx.route_bb[net_id] = full_device_bb(); continue; } diff --git a/vpr/src/route/SerialNetlistRouter.h b/vpr/src/route/SerialNetlistRouter.h index 7d62bad7744..5bb59df1998 100644 --- a/vpr/src/route/SerialNetlistRouter.h +++ b/vpr/src/route/SerialNetlistRouter.h @@ -1,6 +1,6 @@ #pragma once -/** @file Serial case for NetlistRouter: just loop through nets */ +/** @file Serial case for \ref NetlistRouter: just loop through nets */ #include "netlist_routers.h" diff --git a/vpr/src/route/SerialNetlistRouter.tpp b/vpr/src/route/SerialNetlistRouter.tpp index b11b1c0a632..714426a1920 100644 --- a/vpr/src/route/SerialNetlistRouter.tpp +++ b/vpr/src/route/SerialNetlistRouter.tpp @@ -16,9 +16,9 @@ inline RouteIterResults SerialNetlistRouter::route_netlist(int itry, f return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size(); }); - for (size_t i = 0; i < sorted_nets.size(); i++) { - ParentNetId net_id = sorted_nets[i]; - NetResultFlags flags = try_timing_driven_route_net( + for (size_t inet = 0; inet < sorted_nets.size(); inet++) { + ParentNetId net_id = sorted_nets[inet]; + NetResultFlags flags = route_net( _router, _net_list, net_id, @@ -37,14 +37,16 @@ inline RouteIterResults SerialNetlistRouter::route_netlist(int itry, f _choking_spots[net_id], _is_flat); - if (!flags.success && !flags.retry_with_full_bb) { /* Disconnected RRG */ + if (!flags.success && !flags.retry_with_full_bb) { + /* Disconnected RRG and ConnectionRouter doesn't think growing the BB will work */ out.is_routable = false; return out; } - if (flags.retry_with_full_bb) { /* Grow the BB and retry this net */ + if (flags.retry_with_full_bb) { + /* Grow the BB and retry this net right away. */ route_ctx.route_bb[net_id] = full_device_bb(); - i--; + inet--; continue; } diff --git a/vpr/src/route/netlist_routers.h b/vpr/src/route/netlist_routers.h index 8ba397562f3..db51ff01b23 100644 --- a/vpr/src/route/netlist_routers.h +++ b/vpr/src/route/netlist_routers.h @@ -43,12 +43,23 @@ struct RouteIterResults { RouterStats stats; }; -/** Route a given netlist. Takes a big context and passes it around to net & sink routing fns. */ +/** Route a given netlist. Takes a big context and passes it around to net & sink routing fns. + * route_netlist only needs to call the functions in route_net.h/tpp: they handle the global + * bookkeeping. */ class NetlistRouter { public: virtual ~NetlistRouter() {} + + /** Run a single iteration of netlist routing for this->_net_list. This usually means calling + * route_net for each net, which will handle other global updates. + * \return RouteIterResults for this iteration. */ virtual RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack) = 0; + + /** Enable RCV for each of the ConnectionRouters this NetlistRouter manages.*/ virtual void set_rcv_enabled(bool x) = 0; + + /** Set this NetlistRouter's timing_info ptr. We sometimes change timing_info + * throughout iterations, but not frequently enough to make it a public member. */ virtual void set_timing_info(std::shared_ptr timing_info) = 0; }; diff --git a/vpr/src/route/route.cpp b/vpr/src/route/route.cpp index cbb2fcefa3f..469a0455006 100644 --- a/vpr/src/route/route.cpp +++ b/vpr/src/route/route.cpp @@ -12,14 +12,6 @@ #include "route_utils.h" #include "vtr_time.h" -/** Attempts a routing via the AIR algorithm. \p width_fac - * specifies the relative width of the channels, while the members of - * \p router_opts determine the value of the costs assigned to routing - * resource node, etc. \p det_routing_arch describes the detailed routing - * architecture (connection and switch boxes) of the FPGA; it is used - * only if a DETAILED routing has been selected. - * - * \return Success status. */ bool route(const Netlist<>& net_list, int width_fac, const t_router_opts& router_opts, @@ -163,7 +155,7 @@ bool route(const Netlist<>& net_list, VTR_ASSERT(router_lookahead != nullptr); /* Routing parameters */ - float pres_fac = update_pres_fac(router_opts.first_iter_pres_fac); /* Typically 0 -> ignore cong. */ + float pres_fac = update_draw_pres_fac(router_opts.first_iter_pres_fac); /* Typically 0 -> ignore cong. */ int bb_fac = router_opts.bb_factor; /* When routing conflicts are detected the bounding boxes are scaled @@ -365,7 +357,7 @@ bool route(const Netlist<>& net_list, //Decrease pres_fac so that critical connections will take more direct routes //Note that we use first_iter_pres_fac here (typically zero), and switch to //use initial_pres_fac on the next iteration. - pres_fac = update_pres_fac(router_opts.first_iter_pres_fac); + pres_fac = update_draw_pres_fac(router_opts.first_iter_pres_fac); //Reduce timing tolerances to re-route more delay-suboptimal signals connections_inf.set_connection_criticality_tolerance(0.7); @@ -382,7 +374,7 @@ bool route(const Netlist<>& net_list, //after the first routing convergence. Since that is often zero, //we want to set pres_fac to a reasonable (i.e. typically non-zero) //value afterwards -- so it grows when multiplied by pres_fac_mult - pres_fac = update_pres_fac(router_opts.initial_pres_fac); + pres_fac = update_draw_pres_fac(router_opts.initial_pres_fac); } //Have we converged the maximum number of times, did not make any changes, or does it seem @@ -445,12 +437,12 @@ bool route(const Netlist<>& net_list, //Update pres_fac if (itry == 1) { - pres_fac = update_pres_fac(router_opts.initial_pres_fac); + pres_fac = update_draw_pres_fac(router_opts.initial_pres_fac); } else { pres_fac *= router_opts.pres_fac_mult; /* Avoid overflow for high iteration counts, even if acc_cost is big */ - pres_fac = update_pres_fac(std::min(pres_fac, static_cast(HUGE_POSITIVE_FLOAT / 1e5))); + pres_fac = update_draw_pres_fac(std::min(pres_fac, static_cast(HUGE_POSITIVE_FLOAT / 1e5))); // Increase short path criticality if it's having a hard time resolving hold violations due to congestion if (budgeting_inf.if_set()) { diff --git a/vpr/src/route/route.h b/vpr/src/route/route.h index 0a62d6b2371..cf6efb26311 100644 --- a/vpr/src/route/route.h +++ b/vpr/src/route/route.h @@ -6,13 +6,16 @@ #include "vpr_types.h" #include "netlist.h" -/** Attempts a routing via the AIR algorithm. \p width_fac - * specifies the relative width of the channels, while the members of +/** Attempts a routing via the AIR algorithm [0]. + * + * \p width_fac specifies the relative width of the channels, while the members of * \p router_opts determine the value of the costs assigned to routing * resource node, etc. \p det_routing_arch describes the detailed routing * architecture (connection and switch boxes) of the FPGA; it is used * only if a DETAILED routing has been selected. * + * [0]: K. E. Murray, S. Zhong, and V. Betz, "AIR: A fast but lazy timing-driven FPGA router", in ASPDAC 2020 + * * \return Success status. */ bool route(const Netlist<>& net_list, int width_fac, diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index fd0ddc6f24d..733b880a1a4 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -118,9 +118,6 @@ void get_serial_num(const Netlist<>& net_list) { VTR_LOG("Serial number (magic cookie) for the routing is: %d\n", serial_num); } -/** This routine checks to see if this is a resource-feasible routing. - * That is, are all rr_node capacity limitations respected? It assumes - * that the occupancy arrays are up to date when it is called. */ bool feasible_routing() { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h index 3cc6ba3f72e..203e2880059 100644 --- a/vpr/src/route/route_common.h +++ b/vpr/src/route/route_common.h @@ -1,7 +1,7 @@ #pragma once -/** @file More router utils: some used by the connection router, some by other - * router files and some used globally */ +/** @file Misc. router utils: some used by the connection router, some by other + * router files and some used globally. */ #include #include "clustered_netlist.h" @@ -9,6 +9,9 @@ #include "router_stats.h" #include "globals.h" +/** This routine checks to see if this is a resource-feasible routing. + * That is, are all rr_node capacity limitations respected? It assumes + * that the occupancy arrays are up to date when it is called. */ bool feasible_routing(); vtr::vector load_route_bb(const Netlist<>& net_list, diff --git a/vpr/src/route/route_net.cpp b/vpr/src/route/route_net.cpp index 19849d66bcd..5e60a6f9964 100644 --- a/vpr/src/route/route_net.cpp +++ b/vpr/src/route/route_net.cpp @@ -3,8 +3,6 @@ #include "route_net.h" #include "stats.h" -/** When RCV is enabled, it's necessary to be able to completely ripup high fanout nets if there is still negative hold slack - * Normally the router will prune the illegal branches of high fanout nets, this will bypass this */ bool check_hold(const t_router_opts& router_opts, float worst_neg_slack) { if (router_opts.routing_budgets_algorithm != YOYO) { return false; @@ -14,10 +12,6 @@ bool check_hold(const t_router_opts& router_opts, float worst_neg_slack) { return false; } -/** Build and return a partial route tree from the legal connections from last iteration. - * along the way do: - * update pathfinder costs to be accurate to the partial route tree - * mark the rr_node sinks as targets to be reached. */ void setup_routing_resources(int itry, ParentNetId net_id, const Netlist<>& net_list, @@ -110,7 +104,6 @@ void setup_routing_resources(int itry, // completed constructing the partial route tree and updated all other data structures to match } -/** Change the base costs of rr_nodes according to # of fanouts */ void update_rr_base_costs(int fanout) { auto& device_ctx = g_vpr_ctx.mutable_device(); @@ -143,7 +136,6 @@ void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node) { } } -/** Detect if net should be routed or not */ bool should_route_net(ParentNetId net_id, CBRR& connections_inf, bool if_force_reroute) { @@ -268,13 +260,6 @@ WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t avail return WirelengthInfo(available_wirelength, used_wirelength); } -/** Returns true if the specified net fanout is classified as high fanout */ -bool is_high_fanout(int fanout, int fanout_threshold) { - if (fanout_threshold < 0 || fanout < fanout_threshold) return false; - return true; -} - -/** Returns the bounding box of a net's used routing resources */ t_bb calc_current_bb(const RouteTree& tree) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; @@ -303,29 +288,6 @@ t_bb calc_current_bb(const RouteTree& tree) { return bb; } -/** Give-up on reconvergent routing if the CPD improvement after the - * first iteration since convergence is small, compared to the best - * CPD seen so far */ -bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts, - int itry_since_last_convergence, - std::shared_ptr timing_info, - const RoutingMetrics& best_routing_metrics) { - if (itry_since_last_convergence == 1) { - float cpd_ratio = timing_info->setup_worst_negative_slack() / best_routing_metrics.sWNS; - - // Give up if we see less than a 1% CPD improvement, - // after reducing pres_fac. Typically larger initial - // improvements are needed to see an actual improvement - // in final legal routing quality. - if (cpd_ratio >= router_opts.reconvergence_cpd_threshold) { - VTR_LOG("Giving up routing since additional routing convergences seem unlikely to improve quality (CPD ratio: %g)\n", cpd_ratio); - return true; // Potential CPD improvement is small, don't spend run-time trying to improve it - } - } - - return false; // Don't give up -} - // Initializes net_delay based on best-case delay estimates from the router lookahead void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead, const Netlist<>& net_list, @@ -358,8 +320,6 @@ void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead, } } -/** Goes through all the sinks of this net and copies their delay values from - * the route_tree to the net_delay array. */ void update_net_delays_from_route_tree(float* net_delay, const Netlist<>& net_list, ParentNetId inet, diff --git a/vpr/src/route/route_net.h b/vpr/src/route/route_net.h index bc81581f41d..e13563ce356 100644 --- a/vpr/src/route/route_net.h +++ b/vpr/src/route/route_net.h @@ -1,5 +1,7 @@ #pragma once +/** @file Net and sink routing functions, and other utils used by them. */ + #include #include @@ -32,18 +34,18 @@ struct NetResultFlags { bool retry_with_full_bb = false; }; +/** When RCV is enabled, it's necessary to be able to completely ripup high fanout nets + * if there is still negative hold slack. Normally the router will prune the illegal branches + * of high fanout nets, this will bypass that */ bool check_hold(const t_router_opts& router_opts, float worst_neg_slack); -size_t dynamic_update_bounding_boxes(const std::vector& updated_nets, - const Netlist<>& net_list, - int high_fanout_threshold); - /** Return a full-device bounding box */ inline t_bb full_device_bb(void) { const auto& grid = g_vpr_ctx.device().grid; return {0, (int)grid.width() - 1, 0, (int)grid.height() - 1}; } +/** Get criticality of \p pin_id in net \p net_id from 0 to 1 */ float get_net_pin_criticality(const SetupHoldTimingInfo* timing_info, const ClusteredPinAtomPinsLookup& netlist_pin_lookup, float max_criticality, @@ -52,8 +54,18 @@ float get_net_pin_criticality(const SetupHoldTimingInfo* timing_info, ParentPinId pin_id, bool is_flat); -bool is_high_fanout(int fanout, int fanout_threshold); +/** Returns true if the specified net fanout is classified as high fanout */ +constexpr bool is_high_fanout(int fanout, int fanout_threshold) { + if (fanout_threshold < 0 || fanout < fanout_threshold) + return false; + return true; +} +/** Build a partial route tree in global context for \p net_id from the legal + * connections from last iteration. + * Along the way do: + * - update pathfinder costs to be accurate to the partial route tree + * - mark the rr_node sinks as targets to be reached. */ void setup_routing_resources(int itry, ParentNetId net_id, const Netlist<>& net_list, @@ -88,12 +100,16 @@ inline void update_net_delay_from_isink(float* net_delay, net_delay[isink] = new_delay; } +/** Goes through all the sinks of this net and copies their delay values from + * the route_tree to the net_delay array. */ void update_net_delays_from_route_tree(float* net_delay, const Netlist<>& net_list, ParentNetId inet, TimingInfo* timing_info, NetPinTimingInvalidator* pin_timing_invalidator); +/** Change the base costs of rr_nodes globally according to # of fanouts + * TODO: is this even thread safe? */ void update_rr_base_costs(int fanout); /** Traverses down a route tree and updates rr_node_inf for all nodes diff --git a/vpr/src/route/route_net.tpp b/vpr/src/route/route_net.tpp index fe7def07ffb..8542d8f306d 100644 --- a/vpr/src/route/route_net.tpp +++ b/vpr/src/route/route_net.tpp @@ -13,77 +13,6 @@ #include "route_profiling.h" #include "rr_graph_fwd.h" -template -inline NetResultFlags try_timing_driven_route_net(ConnectionRouter& router, - const Netlist<>& net_list, - const ParentNetId& net_id, - int itry, - float pres_fac, - const t_router_opts& router_opts, - CBRR& connections_inf, - RouterStats& router_stats, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - SetupHoldTimingInfo* timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - route_budgets& budgeting_inf, - float worst_negative_slack, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat) { - auto& route_ctx = g_vpr_ctx.mutable_routing(); - - NetResultFlags flags; - - bool reroute_for_hold = false; - if (budgeting_inf.if_set()) { - reroute_for_hold = (budgeting_inf.get_should_reroute(net_id)); - reroute_for_hold &= worst_negative_slack != 0; - } - - if (route_ctx.net_status.is_fixed(net_id)) { /* Skip pre-routed nets. */ - flags.success = true; - } else if (net_list.net_is_ignored(net_id)) { /* Skip ignored nets. */ - flags.success = true; - } else if (!(reroute_for_hold) && !should_route_net(net_id, connections_inf, true)) { - flags.success = true; - } else { - // track time spent vs fanout - profiling::net_fanout_start(); - - flags = timing_driven_route_net(router, - net_list, - net_id, - itry, - pres_fac, - router_opts, - connections_inf, - router_stats, - net_delay[net_id].data(), - netlist_pin_lookup, - timing_info, - pin_timing_invalidator, - budgeting_inf, - worst_negative_slack, - routing_predictor, - choking_spots, - is_flat); - - profiling::net_fanout_end(net_list.net_sinks(net_id).size()); - - /* Impossible to route? (disconnected rr_graph) */ - if (flags.success) { - route_ctx.net_status.set_is_routed(net_id, true); - } else { - VTR_LOG("Routing failed for net %d\n", net_id); - } - - flags.was_rerouted = true; // Flag to record whether routing was actually changed - } - - return flags; -} - /** Attempt to route a single net. * * @param router The ConnectionRouter instance @@ -105,35 +34,57 @@ inline NetResultFlags try_timing_driven_route_net(ConnectionRouter& router, * @param routing_predictor * @param choking_spots * @param is_flat - * @return NetResultFlags for this net. success = false means the RR graph is disconnected and the caller can give up */ + * @return NetResultFlags for this net */ template -inline NetResultFlags timing_driven_route_net(ConnectionRouter& router, - const Netlist<>& net_list, - ParentNetId net_id, - int itry, - float pres_fac, - const t_router_opts& router_opts, - CBRR& connections_inf, - RouterStats& router_stats, - float* net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - SetupHoldTimingInfo* timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - route_budgets& budgeting_inf, - float worst_neg_slack, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat) { +inline NetResultFlags route_net(ConnectionRouter& router, + const Netlist<>& net_list, + const ParentNetId& net_id, + int itry, + float pres_fac, + const t_router_opts& router_opts, + CBRR& connections_inf, + RouterStats& router_stats, + NetPinsMatrix& net_delays, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + SetupHoldTimingInfo* timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + route_budgets& budgeting_inf, + float worst_negative_slack, + const RoutingPredictor& routing_predictor, + const std::vector>& choking_spots, + bool is_flat) { + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + NetResultFlags flags; + + bool reroute_for_hold = false; + if (budgeting_inf.if_set()) { + reroute_for_hold = (budgeting_inf.get_should_reroute(net_id)); + reroute_for_hold &= worst_negative_slack != 0; + } + + flags.success = true; + + if (route_ctx.net_status.is_fixed(net_id)) { /* Skip pre-routed nets. */ + return flags; + } else if (net_list.net_is_ignored(net_id)) { /* Skip ignored nets. */ + return flags; + } else if (!(reroute_for_hold) && !should_route_net(net_id, connections_inf, true)) { + return flags; + } + + // track time spent vs fanout + profiling::net_fanout_start(); + + flags.was_rerouted = true; // Flag to record whether routing was actually changed + auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - auto& route_ctx = g_vpr_ctx.mutable_routing(); unsigned int num_sinks = net_list.net_sinks(net_id).size(); VTR_LOGV_DEBUG(f_router_debug, "Routing Net %zu (%zu sinks)\n", size_t(net_id), num_sinks); - NetResultFlags flags; - setup_routing_resources( itry, net_id, @@ -142,7 +93,7 @@ inline NetResultFlags timing_driven_route_net(ConnectionRouter& router, router_opts.min_incremental_reroute_fanout, connections_inf, router_opts, - check_hold(router_opts, worst_neg_slack)); + check_hold(router_opts, worst_negative_slack)); VTR_ASSERT(route_ctx.route_trees[net_id]); RouteTree& tree = route_ctx.route_trees[net_id].value(); @@ -204,16 +155,16 @@ inline NetResultFlags timing_driven_route_net(ConnectionRouter& router, // delay by selecting a direct route from the clock source to the virtual sink cost_params.criticality = router_opts.max_criticality; - return timing_driven_pre_route_to_clock_root(router, - net_id, - net_list, - sink_node, - cost_params, - router_opts.high_fanout_threshold, - tree, - spatial_route_tree_lookup, - router_stats, - is_flat); + return pre_route_to_clock_root(router, + net_id, + net_list, + sink_node, + cost_params, + router_opts.high_fanout_threshold, + tree, + spatial_route_tree_lookup, + router_stats, + is_flat); } if (budgeting_inf.if_set()) { @@ -241,25 +192,26 @@ inline NetResultFlags timing_driven_route_net(ConnectionRouter& router, profiling::conn_start(); // build a branch in the route tree to the target - auto sink_flags = timing_driven_route_sink(router, - net_list, - net_id, - itarget, - target_pin, - cost_params, - router_opts, - tree, - spatial_route_tree_lookup, - router_stats, - budgeting_inf, - routing_predictor, - choking_spots, - is_flat); + auto sink_flags = route_sink(router, + net_list, + net_id, + itarget, + target_pin, + cost_params, + router_opts, + tree, + spatial_route_tree_lookup, + router_stats, + budgeting_inf, + routing_predictor, + choking_spots, + is_flat); flags.retry_with_full_bb |= sink_flags.retry_with_full_bb; if (!sink_flags.success) { flags.success = false; + VTR_LOG("Routing failed for net %d\n", net_id); return flags; } @@ -275,6 +227,8 @@ inline NetResultFlags timing_driven_route_net(ConnectionRouter& router, /* For later timing analysis. */ + float* net_delay = net_delays[net_id].data(); + // may have to update timing delay of the previously legally reached sinks since downstream capacitance could be changed update_net_delays_from_route_tree(net_delay, net_list, @@ -289,25 +243,29 @@ inline NetResultFlags timing_driven_route_net(ConnectionRouter& router, } VTR_ASSERT_MSG(g_vpr_ctx.routing().rr_node_route_inf[tree.root().inode].occ() <= rr_graph.node_capacity(tree.root().inode), "SOURCE should never be congested"); - VTR_LOGV_DEBUG(f_router_debug, "Routed Net %zu (%zu sinks)\n", size_t(net_id), num_sinks); + router.empty_rcv_route_tree_set(); // ? - flags.success = true; + profiling::net_fanout_end(net_list.net_sinks(net_id).size()); + + route_ctx.net_status.set_is_routed(net_id, true); return flags; } +/** Route to a "virtual sink" in the netlist which corresponds to the start point + * of the global clock network. */ template -inline NetResultFlags timing_driven_pre_route_to_clock_root(ConnectionRouter& router, - ParentNetId net_id, - const Netlist<>& net_list, - RRNodeId sink_node, - const t_conn_cost_params cost_params, - int high_fanout_threshold, - RouteTree& tree, - SpatialRouteTreeLookup& spatial_rt_lookup, - RouterStats& router_stats, - bool is_flat) { +inline NetResultFlags pre_route_to_clock_root(ConnectionRouter& router, + ParentNetId net_id, + const Netlist<>& net_list, + RRNodeId sink_node, + const t_conn_cost_params cost_params, + int high_fanout_threshold, + RouteTree& tree, + SpatialRouteTreeLookup& spatial_rt_lookup, + RouterStats& router_stats, + bool is_flat) { const auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.mutable_routing(); auto& m_route_ctx = g_vpr_ctx.mutable_routing(); @@ -338,7 +296,7 @@ inline NetResultFlags timing_driven_pre_route_to_clock_root(ConnectionRouter& ro router_stats, conn_params); - // TODO: Parts of the rest of this function are repetitive to code in timing_driven_route_sink. Should refactor. + // TODO: Parts of the rest of this function are repetitive to code in route_sink. Should refactor. if (!found_path) { ParentBlockId src_block = net_list.net_driver_block(net_id); VTR_LOG("Failed to route connection from '%s' to '%s' for net '%s' (#%zu)\n", @@ -408,22 +366,22 @@ inline NetResultFlags timing_driven_pre_route_to_clock_root(ConnectionRouter& ro * @param routing_predictor * @param choking_spots * @param is_flat - * @return NetResultFlags for this sink to be bubbled up through timing_driven_route_net */ + * @return NetResultFlags for this sink to be bubbled up through route_net */ template -inline NetResultFlags timing_driven_route_sink(ConnectionRouter& router, - const Netlist<>& net_list, - ParentNetId net_id, - unsigned itarget, - int target_pin, - const t_conn_cost_params cost_params, - const t_router_opts& router_opts, - RouteTree& tree, - SpatialRouteTreeLookup& spatial_rt_lookup, - RouterStats& router_stats, - route_budgets& budgeting_inf, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat) { +inline NetResultFlags route_sink(ConnectionRouter& router, + const Netlist<>& net_list, + ParentNetId net_id, + unsigned itarget, + int target_pin, + const t_conn_cost_params cost_params, + const t_router_opts& router_opts, + RouteTree& tree, + SpatialRouteTreeLookup& spatial_rt_lookup, + RouterStats& router_stats, + route_budgets& budgeting_inf, + const RoutingPredictor& routing_predictor, + const std::vector>& choking_spots, + bool is_flat) { const auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.mutable_routing(); diff --git a/vpr/src/route/route_utils.cpp b/vpr/src/route/route_utils.cpp index 03e473bb7b4..f90789e5250 100644 --- a/vpr/src/route/route_utils.cpp +++ b/vpr/src/route/route_utils.cpp @@ -143,6 +143,26 @@ size_t dynamic_update_bounding_boxes(const std::vector& updated_net return num_bb_updated; } +bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts, + int itry_since_last_convergence, + std::shared_ptr timing_info, + const RoutingMetrics& best_routing_metrics) { + if (itry_since_last_convergence == 1) { + float cpd_ratio = timing_info->setup_worst_negative_slack() / best_routing_metrics.sWNS; + + // Give up if we see less than a 1% CPD improvement, + // after reducing pres_fac. Typically larger initial + // improvements are needed to see an actual improvement + // in final legal routing quality. + if (cpd_ratio >= router_opts.reconvergence_cpd_threshold) { + VTR_LOG("Giving up routing since additional routing convergences seem unlikely to improve quality (CPD ratio: %g)\n", cpd_ratio); + return true; // Potential CPD improvement is small, don't spend run-time trying to improve it + } + } + + return false; // Don't give up +} + bool is_better_quality_routing(const vtr::vector>& best_routing, const RoutingMetrics& best_routing_metrics, const WirelengthInfo& wirelength_info, @@ -183,11 +203,6 @@ bool is_better_quality_routing(const vtr::vector timing_info, bool rcv_finished) { - //This function checks if a routing iteration has completed. - //When VPR is run normally, we check if routing_budgets_algorithm is disabled, and if the routing is legal - //With the introduction of yoyo budgeting algorithm, we must check if there are no hold violations - //in addition to routing being legal and the correct budgeting algorithm being set. - if (routing_is_feasible) { if (router_opts.routing_budgets_algorithm != YOYO) { return true; @@ -351,10 +366,6 @@ void print_router_criticality_histogram(const Netlist<>& net_list, print_histogram(create_criticality_histogram(net_list, timing_info, netlist_pin_lookup, is_flat, 10)); } -// If a route is ripped up during routing, non-configurable sets are left -// behind. As a result, the final routing may have stubs at -// non-configurable sets. This function tracks non-configurable set usage, -// and if the sets are unused, prunes them. void prune_unused_non_configurable_nets(CBRR& connections_inf, const Netlist<>& net_list) { auto& device_ctx = g_vpr_ctx.device(); @@ -496,10 +507,7 @@ void try_graph(int width_fac, is_flat); } -/* This routine should take the new value of the present congestion factor - * and propagate it to all the relevant data fields in the vpr flow. - * Currently, it only updates the pres_fac used by the drawing functions */ -float update_pres_fac(float new_pres_fac) { +float update_draw_pres_fac(float new_pres_fac) { #ifndef NO_GRAPHICS // Only updates the drawing pres_fac if graphics is enabled diff --git a/vpr/src/route/route_utils.h b/vpr/src/route/route_utils.h index 91890a78f1e..8b86f230290 100644 --- a/vpr/src/route/route_utils.h +++ b/vpr/src/route/route_utils.h @@ -1,6 +1,6 @@ #pragma once -/** @file Utility fns for top-level router. */ +/** @file Utility functions used in the top-level router (route.cpp). */ #include "router_stats.h" #include "timing_info.h" @@ -67,6 +67,7 @@ void generate_route_timing_reports(const t_router_opts& router_opts, const RoutingDelayCalculator& delay_calc, bool is_flat); +/** Get the maximum number of pins used in the netlist (used to allocate things) */ int get_max_pins_per_net(const Netlist<>& net_list); /** Initialize net_delay based on best-case delay estimates from the router lookahead. */ @@ -82,6 +83,10 @@ bool is_better_quality_routing(const vtr::vector timing_info); +/** This function checks if a routing iteration has completed. + * When VPR is run normally, we check if routing_budgets_algorithm is disabled, and if the routing is legal + * With the introduction of yoyo budgeting algorithm, we must check if there are no hold violations + * in addition to routing being legal and the correct budgeting algorithm being set. */ bool is_iteration_complete(bool routing_is_feasible, const t_router_opts& router_opts, int itry, std::shared_ptr timing_info, bool rcv_finished); void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info); @@ -95,7 +100,11 @@ void print_router_criticality_histogram(const Netlist<>& net_list, const ClusteredPinAtomPinsLookup& netlist_pin_lookup, bool is_flat); -/** Prune stubs of non-config nodes from route_trees */ +/** Prune stubs of non-config nodes from route_ctx.route_trees. + * If a route is ripped up during routing, non-configurable sets are left + * behind. As a result, the final routing may have stubs at + * non-configurable sets. This function tracks non-configurable set usage, + * and if the sets are unused, prunes them. */ void prune_unused_non_configurable_nets(CBRR& connections_inf, const Netlist<>& net_list); @@ -127,7 +136,10 @@ void try_graph(int width_fac, int num_directs, bool is_flat); -float update_pres_fac(float new_pres_fac); +/* This routine should take the new value of the present congestion factor + * and propagate it to all the relevant data fields in the vpr flow. + * Currently, it only updates the pres_fac used by the drawing functions */ +float update_draw_pres_fac(float new_pres_fac); #ifndef NO_GRAPHICS /** Updates router iteration information and checks for router iteration and net id breakpoints diff --git a/vpr/src/route/router_stats.h b/vpr/src/route/router_stats.h index 45dff0ded77..4f999a722d1 100644 --- a/vpr/src/route/router_stats.h +++ b/vpr/src/route/router_stats.h @@ -52,6 +52,7 @@ struct RouterStats { size_t add_high_fanout_rt = 0; size_t add_all_rt = 0; + /** Add rhs's stats to mine */ void combine(RouterStats& rhs) { connections_routed += rhs.connections_routed; nets_routed += rhs.nets_routed;