diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst index 77eb679ca6..7a87605e6d 100644 --- a/doc/src/vpr/command_line_usage.rst +++ b/doc/src/vpr/command_line_usage.rst @@ -569,7 +569,7 @@ For people not working on CAD, you can probably leave all the options to their d **Default**: ``auto`` -.. option:: --alpha_clustering +.. option:: --timing_gain_weight A parameter that weights the optimization of timing vs area. @@ -577,7 +577,7 @@ For people not working on CAD, you can probably leave all the options to their d **Default**: ``0.75`` -.. option:: --beta_clustering +.. option:: --connection_gain_weight A tradeoff parameter that controls the optimization of smaller net absorption vs. the optimization of signal sharing. diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index f911039c18..b40dd89f18 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -570,15 +570,12 @@ void SetupPackerOpts(const t_options& Options, PackerOpts->doPacking = STAGE_DO; } - //TODO: document? - PackerOpts->global_clocks = true; /* DEFAULT */ - PackerOpts->allow_unrelated_clustering = Options.allow_unrelated_clustering; PackerOpts->connection_driven = Options.connection_driven_clustering; PackerOpts->timing_driven = Options.timing_driven_clustering; PackerOpts->cluster_seed_type = Options.cluster_seed_type; - PackerOpts->alpha = Options.alpha_clustering; - PackerOpts->beta = Options.beta_clustering; + PackerOpts->timing_gain_weight = Options.timing_gain_weight; + PackerOpts->connection_gain_weight = Options.connection_gain_weight; PackerOpts->pack_verbosity = Options.pack_verbosity; PackerOpts->enable_pin_feasibility_filter = Options.enable_clustering_pin_feasibility_filter; PackerOpts->balance_block_type_utilization = Options.balance_block_type_utilization; @@ -588,13 +585,10 @@ void SetupPackerOpts(const t_options& Options, PackerOpts->high_fanout_threshold = Options.pack_high_fanout_threshold; PackerOpts->transitive_fanout_threshold = Options.pack_transitive_fanout_threshold; PackerOpts->feasible_block_array_size = Options.pack_feasible_block_array_size; - PackerOpts->use_attraction_groups = Options.use_attraction_groups; PackerOpts->device_layout = Options.device_layout; PackerOpts->timing_update_type = Options.timing_update_type; - PackerOpts->pack_num_moves = Options.pack_num_moves; - PackerOpts->pack_move_type = Options.pack_move_type; } static void SetupNetlistOpts(const t_options& Options, t_netlist_opts& NetlistOpts) { diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index fba46d4818..bdabc71e78 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -732,8 +732,8 @@ static void ShowPackerOpts(const t_packer_opts& PackerOpts) { } else { VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown packer allow_unrelated_clustering\n"); } - VTR_LOG("PackerOpts.alpha_clustering: %f\n", PackerOpts.alpha); - VTR_LOG("PackerOpts.beta_clustering: %f\n", PackerOpts.beta); + VTR_LOG("PackerOpts.timing_gain_weight: %f\n", PackerOpts.timing_gain_weight); + VTR_LOG("PackerOpts.connection_gain_weight: %f\n", PackerOpts.connection_gain_weight); VTR_LOG("PackerOpts.cluster_seed_type: "); switch (PackerOpts.cluster_seed_type) { case e_cluster_seed::TIMING: @@ -758,7 +758,6 @@ static void ShowPackerOpts(const t_packer_opts& PackerOpts) { VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown packer cluster_seed_type\n"); } VTR_LOG("PackerOpts.connection_driven: %s", (PackerOpts.connection_driven ? "true\n" : "false\n")); - VTR_LOG("PackerOpts.global_clocks: %s", (PackerOpts.global_clocks ? "true\n" : "false\n")); VTR_LOG("PackerOpts.timing_driven: %s", (PackerOpts.timing_driven ? "true\n" : "false\n")); VTR_LOG("PackerOpts.target_external_pin_util: %s", vtr::join(PackerOpts.target_external_pin_util, " ").c_str()); VTR_LOG("\n"); diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 41d1af3380..6c741c12f6 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -1972,14 +1972,14 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("auto") .show_in(argparse::ShowIn::HELP_ONLY); - pack_grp.add_argument(args.alpha_clustering, "--alpha_clustering") + pack_grp.add_argument(args.timing_gain_weight, "--timing_gain_weight") .help( "Parameter that weights the optimization of timing vs area. 0.0 focuses solely on" " area, 1.0 solely on timing.") .default_value("0.75") .show_in(argparse::ShowIn::HELP_ONLY); - pack_grp.add_argument(args.beta_clustering, "--beta_clustering") + pack_grp.add_argument(args.connection_gain_weight, "--connection_gain_weight") .help( "Parameter that weights the absorption of small nets vs signal sharing." " 0.0 focuses solely on sharing, 1.0 solely on small net absoprtion." @@ -2101,24 +2101,6 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("2") .show_in(argparse::ShowIn::HELP_ONLY); - pack_grp.add_argument(args.use_attraction_groups, "--use_attraction_groups") - .help("Whether attraction groups are used to make it easier to pack primitives in the same floorplan region together.") - .default_value("on") - .show_in(argparse::ShowIn::HELP_ONLY); - - pack_grp.add_argument(args.pack_num_moves, "--pack_num_moves") - .help( - "The number of moves that can be tried in packing stage") - .default_value("100000") - .show_in(argparse::ShowIn::HELP_ONLY); - - pack_grp.add_argument(args.pack_move_type, "--pack_move_type") - .help( - "The move type used in packing." - "The available values are: randomSwap, semiDirectedSwap, semiDirectedSameTypeSwap") - .default_value("semiDirectedSwap") - .show_in(argparse::ShowIn::HELP_ONLY); - auto& place_grp = parser.add_argument_group("placement options"); place_grp.add_argument(args.Seed, "--seed") diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 13c0d93f3f..fc5e4889ad 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -108,8 +108,8 @@ struct t_options { /* Clustering options */ argparse::ArgValue connection_driven_clustering; argparse::ArgValue allow_unrelated_clustering; - argparse::ArgValue alpha_clustering; - argparse::ArgValue beta_clustering; + argparse::ArgValue timing_gain_weight; + argparse::ArgValue connection_gain_weight; argparse::ArgValue timing_driven_clustering; argparse::ArgValue cluster_seed_type; argparse::ArgValue enable_clustering_pin_feasibility_filter; @@ -120,9 +120,6 @@ struct t_options { argparse::ArgValue pack_feasible_block_array_size; argparse::ArgValue> pack_high_fanout_threshold; argparse::ArgValue pack_verbosity; - argparse::ArgValue use_attraction_groups; - argparse::ArgValue pack_num_moves; - argparse::ArgValue pack_move_type; /* Placement options */ argparse::ArgValue Seed; argparse::ArgValue ShowPlaceTiming; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 2c84a6a768..0d83e5fc17 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -1400,8 +1400,7 @@ bool vpr_analysis_flow(const Netlist<>& net_list, } std::string post_routing_packing_output_file_name = vpr_setup.PackerOpts.output_file + ".post_routing"; - write_packing_results_to_xml(vpr_setup.PackerOpts.global_clocks, - Arch.architecture_id, + write_packing_results_to_xml(Arch.architecture_id, post_routing_packing_output_file_name.c_str()); } else { VTR_LOG_WARN("Synchronization between packing and routing results is not applied due to illegal circuit implementation\n"); diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 106070c4c9..07257e398b 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -705,17 +705,75 @@ enum e_stage_action { /** * @brief Options for packing * - * TODO: document each packing parameter + * @param circuit_file_name + * Path to technology mapped user circuit in BLIF format. + * @param output_file + * Path to packed user circuit in net format. + * @param timing_driven + * Whether or not to do timing driven clustering. (Default: on) + * @param timing_gain_weight + * Controls the optimization of timing vs area in timing driven + * clustering. + * A value of 0 focuses only on area; 1 focuses only on timing. + * (Default: 0.75) + * @param connection_gain_weight + * Controls the optimization of smaller net absorption vs. signal + * sharing in connection driven clustering. + * A value of 0 focuses solely on signal sharing; a value of 1 + * focuses solely on absorbing smaller nets into a cluster. + * (Default: 0.9) + * @param cluster_seed_type + * Selection algorithm for selecting next seed. (Default: blend2 if + * timing_driven is on; max_inputs otherwise) + * @param target_device_utilization + * Sets the target device utilization. (Default: 1.0) + * @param allow_unrelated_clustering + * Allows primitives which have no attraction to the given cluster + * to be packed into it. (Default: auto) + * @param connection_driven + * Controls whether or not packing prioritizes the absorption of nets + * with fewer connections into a complex logic block over nets with + * more connections. (Default: on) + * @param pack_verbosity + * Controls how verbose clustering's output is. (Default: 2) + * @param enable_pin_feasibility_filter + * Counts the number of available pins in groups/classes of mutually + * connected pins within a cluster, then filters out candidate + * primitives/atoms/molecules for which the cluster has insufficient + * pins to route (without performing a full routing). (Default: on) + * @param balance_block_type_utilization + * If enabled, when a primitive can potentially be mapped to multiple + * block types the packer will pick the block type which (currently) + * has the lowest utilization. (Default: auto) + * @param target_external_pin_util + * Sets the external pin utilization target. (Default: auto) + * @param prioritize_transitive_connectivity + * Whether transitive connectivity is prioritized over high-fanout + * connectivity. (Default: on) + * @param feasible_block_array_size + * Max size of the priority queue for candidates that pass the early + * filter legality test, but not the more detailed routing test. + * (Default: 30) + * @param doPacking + * Run packing stage. + * @param device_layout + * Controls which device layout/floorplan is used from the + * architecture file. (Default: smallest device which satisfies the + * circuit's resource requirements) + * @param timing_update_type + * Controls how timing analysis updates are performed. (Default: auto) + * @param load_flat_placement + * Whether to reconstruct a packing solution from a flat placement + * file. (Default: off; on if is on) */ struct t_packer_opts { std::string circuit_file_name; std::string sdc_file_name; std::string output_file; - bool global_clocks; bool timing_driven; enum e_cluster_seed cluster_seed_type; - float alpha; - float beta; + float timing_gain_weight; + float connection_gain_weight; float target_device_utilization; e_unrelated_clustering allow_unrelated_clustering; bool connection_driven; @@ -730,9 +788,6 @@ struct t_packer_opts { e_stage_action doPacking; std::string device_layout; e_timing_update_type timing_update_type; - bool use_attraction_groups; - int pack_num_moves; - std::string pack_move_type; bool load_flat_placement = false; }; diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index e3361971b3..1ed359ca0c 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -77,7 +77,6 @@ void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, } output_clustering(&cluster_legalizer, - packer_opts.global_clocks, is_clock, arch->architecture_id, packer_opts.output_file.c_str(), diff --git a/vpr/src/pack/greedy_candidate_selector.cpp b/vpr/src/pack/greedy_candidate_selector.cpp index 10255890d6..199894b8d2 100644 --- a/vpr/src/pack/greedy_candidate_selector.cpp +++ b/vpr/src/pack/greedy_candidate_selector.cpp @@ -299,7 +299,7 @@ void GreedyCandidateSelector::update_cluster_gain_stats_candidate_success( AtomNetId net_id = atom_netlist_.pin_net(pin_id); e_gain_update gain_flag = e_gain_update::NO_GAIN; - if (!is_clock_.count(net_id) || !packer_opts_.global_clocks) + if (!is_clock_.count(net_id)) gain_flag = e_gain_update::GAIN; mark_and_update_partial_gain(cluster_gain_stats, @@ -327,13 +327,9 @@ void GreedyCandidateSelector::update_cluster_gain_stats_candidate_success( for (AtomPinId pin_id : atom_netlist_.block_clock_pins(blk_id)) { AtomNetId net_id = atom_netlist_.pin_net(pin_id); - e_gain_update gain_flag = e_gain_update::GAIN; - if (packer_opts_.global_clocks) - gain_flag = e_gain_update::NO_GAIN; - mark_and_update_partial_gain(cluster_gain_stats, net_id, - gain_flag, + e_gain_update::NO_GAIN, blk_id, cluster_legalizer, high_fanout_net_threshold, @@ -623,9 +619,9 @@ void GreedyCandidateSelector::update_total_gain(ClusterGainStats& cluster_gain_s VTR_ASSERT(num_used_pins > 0); if (packer_opts_.connection_driven) { /*try to absorb as many connections as possible*/ - cluster_gain_stats.gain[blk_id] = ((1 - packer_opts_.beta) + cluster_gain_stats.gain[blk_id] = ((1 - packer_opts_.connection_gain_weight) * (float)cluster_gain_stats.sharing_gain[blk_id] - + packer_opts_.beta * (float)cluster_gain_stats.connection_gain[blk_id]) + + packer_opts_.connection_gain_weight * (float)cluster_gain_stats.connection_gain[blk_id]) / (num_used_pins); } else { cluster_gain_stats.gain[blk_id] = ((float)cluster_gain_stats.sharing_gain[blk_id]) @@ -634,9 +630,9 @@ void GreedyCandidateSelector::update_total_gain(ClusterGainStats& cluster_gain_s /* Add in timing driven cost into cost function */ if (packer_opts_.timing_driven) { - cluster_gain_stats.gain[blk_id] = packer_opts_.alpha + cluster_gain_stats.gain[blk_id] = packer_opts_.timing_gain_weight * cluster_gain_stats.timing_gain[blk_id] - + (1.0 - packer_opts_.alpha) * (float)cluster_gain_stats.gain[blk_id]; + + (1.0 - packer_opts_.timing_gain_weight) * (float)cluster_gain_stats.gain[blk_id]; } } } diff --git a/vpr/src/pack/output_clustering.cpp b/vpr/src/pack/output_clustering.cpp index 28582294ce..9cdc27ac39 100644 --- a/vpr/src/pack/output_clustering.cpp +++ b/vpr/src/pack/output_clustering.cpp @@ -640,7 +640,7 @@ static void clustering_xml_blocks_from_netlist(pugi::xml_node& block_node, /* This routine dumps out the output netlist in a format suitable for * * input to vpr. This routine also dumps out the internal structure of * * the cluster, in essentially a graph based format. */ -void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering, bool from_legalizer) { +void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering, bool from_legalizer) { const DeviceContext& device_ctx = g_vpr_ctx.device(); const AtomNetlist& atom_nlist = g_vpr_ctx.atom().netlist(); @@ -689,17 +689,15 @@ void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, bool global_cloc block_node.append_child("inputs").text().set(vtr::join(inputs.begin(), inputs.end(), " ").c_str()); block_node.append_child("outputs").text().set(vtr::join(outputs.begin(), outputs.end(), " ").c_str()); - if (global_clocks) { - std::vector clocks; - for (auto net_id : atom_nlist.nets()) { - if (is_clock.count(net_id)) { - clocks.push_back(atom_nlist.net_name(net_id)); - } + std::vector clocks; + for (auto net_id : atom_nlist.nets()) { + if (is_clock.count(net_id)) { + clocks.push_back(atom_nlist.net_name(net_id)); } - - block_node.append_child("clocks").text().set(vtr::join(clocks.begin(), clocks.end(), " ").c_str()); } + block_node.append_child("clocks").text().set(vtr::join(clocks.begin(), clocks.end(), " ").c_str()); + if (skip_clustering == false) { if (from_legalizer) { VTR_ASSERT(cluster_legalizer_ptr != nullptr); @@ -724,15 +722,13 @@ void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, bool global_cloc * As such, this function is expected to be a standard API * which can be called anytime and anywhere after packing is finished. ********************************************************************/ -void write_packing_results_to_xml(const bool& global_clocks, - const std::string& architecture_id, +void write_packing_results_to_xml(const std::string& architecture_id, const char* out_fname) { std::unordered_set is_clock = alloc_and_load_is_clock(); // Since the cluster legalizer is not being used to output the clustering // (from_legalizer is false), passing in nullptr. output_clustering(nullptr, - global_clocks, is_clock, architecture_id, out_fname, diff --git a/vpr/src/pack/output_clustering.h b/vpr/src/pack/output_clustering.h index 92d734248d..c7537ee8c3 100644 --- a/vpr/src/pack/output_clustering.h +++ b/vpr/src/pack/output_clustering.h @@ -17,15 +17,13 @@ class ClusterLegalizer; /// clustered netlist. If from_legalizer is false, the clustered netlist currently /// in the global scope will be used. void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, - bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering, bool from_legalizer); -void write_packing_results_to_xml(const bool& global_clocks, - const std::string& architecture_id, +void write_packing_results_to_xml(const std::string& architecture_id, const char* out_fname); #endif