Skip to content

Commit 9b44ae6

Browse files
committed
sched/fair: use static load in wake_affine_weight
For a long time runnable cpu load has been used in selecting task rq when waking up tasks. Recent test has shown for test load with a large quantity of short running tasks and almost full cpu utility, static load is more helpful. In our e2e tests, runnable load avg of java threads ranges from less than 10 to as large as 362, while these java threads are no different from each other, and should be treated in the same way. After using static load, qps imporvement has been seen in multiple test cases. A new sched feature WA_STATIC_WEIGHT is introduced here to control. Echo WA_STATIC_WEIGHT to /sys/kernel/debug/sched_features to turn static load in wake_affine_weight on and NO_WA_STATIC_WEIGHT to turn it off. This feature is kept off by default. Test is done on the following hardware: 4 threads Intel(R) Xeon(R) Platinum 8269CY CPU @ 2.50GHz In tests with 120 threads and sql loglevel configured to info: NO_WA_STATIC_WEIGHT WA_STATIC_WEIGHT 33170.63 34614.95 (+4.35%) In tests with 160 threads and sql loglevel configured to info: NO_WA_STATIC_WEIGHT WA_STATIC_WEIGHT 35888.71 38247.20 (+6.57%) In tests with 160 threads and sql loglevel configured to warn: NO_WA_STATIC_WEIGHT WA_STATIC_WEIGHT 39118.72 39698.72 (+1.48%) Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com> Acked-by: Shanpei Chen <shanpeic@linux.alibaba.com>
1 parent e864d02 commit 9b44ae6

File tree

2 files changed

+69
-4
lines changed

2 files changed

+69
-4
lines changed

kernel/sched/fair.c

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
692692

693693
static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
694694
static unsigned long task_h_load(struct task_struct *p);
695+
static unsigned long task_h_load_static(struct task_struct *p);
695696

696697
/* Give new sched_entity start runnable values to heavy its load in infant time */
697698
void init_entity_runnable_average(struct sched_entity *se)
@@ -5645,25 +5646,41 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
56455646
s64 this_eff_load, prev_eff_load;
56465647
unsigned long task_load;
56475648

5648-
this_eff_load = target_load(this_cpu, sd->wake_idx);
5649+
if (sched_feat(WA_STATIC_WEIGHT))
5650+
this_eff_load =
5651+
scale_load_down(cpu_rq(this_cpu)->cfs.load.weight);
5652+
else
5653+
this_eff_load = target_load(this_cpu, sd->wake_idx);
56495654

56505655
if (sync) {
5651-
unsigned long current_load = task_h_load(current);
5656+
unsigned long current_load;
5657+
5658+
if (sched_feat(WA_STATIC_WEIGHT))
5659+
current_load = task_h_load_static(current);
5660+
else
5661+
current_load = task_h_load(current);
56525662

56535663
if (current_load > this_eff_load)
56545664
return this_cpu;
56555665

56565666
this_eff_load -= current_load;
56575667
}
56585668

5659-
task_load = task_h_load(p);
5669+
if (sched_feat(WA_STATIC_WEIGHT))
5670+
task_load = task_h_load_static(p);
5671+
else
5672+
task_load = task_h_load(p);
56605673

56615674
this_eff_load += task_load;
56625675
if (sched_feat(WA_BIAS))
56635676
this_eff_load *= 100;
56645677
this_eff_load *= capacity_of(prev_cpu);
56655678

5666-
prev_eff_load = source_load(prev_cpu, sd->wake_idx);
5679+
if (sched_feat(WA_STATIC_WEIGHT))
5680+
prev_eff_load =
5681+
scale_load_down(cpu_rq(prev_cpu)->cfs.load.weight);
5682+
else
5683+
prev_eff_load = source_load(prev_cpu, sd->wake_idx);
56675684
prev_eff_load -= task_load;
56685685
if (sched_feat(WA_BIAS))
56695686
prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
@@ -7498,6 +7515,48 @@ static unsigned long task_h_load(struct task_struct *p)
74987515
return div64_ul(p->se.avg.load_avg * cfs_rq->h_load,
74997516
cfs_rq_load_avg(cfs_rq) + 1);
75007517
}
7518+
7519+
static void update_cfs_rq_h_load_static(struct cfs_rq *cfs_rq)
7520+
{
7521+
struct rq *rq = rq_of(cfs_rq);
7522+
struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)];
7523+
unsigned long now = jiffies;
7524+
unsigned long load;
7525+
7526+
if (cfs_rq->last_h_load_update == now)
7527+
return;
7528+
7529+
WRITE_ONCE(cfs_rq->h_load_next, NULL);
7530+
for_each_sched_entity(se) {
7531+
cfs_rq = cfs_rq_of(se);
7532+
WRITE_ONCE(cfs_rq->h_load_next, se);
7533+
if (cfs_rq->last_h_load_update == now)
7534+
break;
7535+
}
7536+
7537+
if (!se) {
7538+
cfs_rq->h_load = scale_load_down(cfs_rq->load.weight);
7539+
cfs_rq->last_h_load_update = now;
7540+
}
7541+
7542+
while ((se = READ_ONCE(cfs_rq->h_load_next)) != NULL) {
7543+
load = cfs_rq->h_load;
7544+
load = div64_ul(load * se->load.weight,
7545+
cfs_rq->load.weight + 1);
7546+
cfs_rq = group_cfs_rq(se);
7547+
cfs_rq->h_load = load;
7548+
cfs_rq->last_h_load_update = now;
7549+
}
7550+
}
7551+
7552+
static unsigned long task_h_load_static(struct task_struct *p)
7553+
{
7554+
struct cfs_rq *cfs_rq = task_cfs_rq(p);
7555+
7556+
update_cfs_rq_h_load_static(cfs_rq);
7557+
return div64_ul(p->se.load.weight * cfs_rq->h_load,
7558+
cfs_rq->load.weight + 1);
7559+
}
75017560
#else
75027561
static inline void update_blocked_averages(int cpu)
75037562
{
@@ -7526,6 +7585,11 @@ static unsigned long task_h_load(struct task_struct *p)
75267585
{
75277586
return p->se.avg.load_avg;
75287587
}
7588+
7589+
static unsigned long task_h_load_static(struct task_struct *p)
7590+
{
7591+
return scale_load_down(p->se.load.weight);
7592+
}
75297593
#endif
75307594

75317595
/********** Helpers for find_busiest_group ************************/

kernel/sched/features.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ SCHED_FEAT(ATTACH_AGE_LOAD, true)
8585
SCHED_FEAT(WA_IDLE, true)
8686
SCHED_FEAT(WA_WEIGHT, true)
8787
SCHED_FEAT(WA_BIAS, true)
88+
SCHED_FEAT(WA_STATIC_WEIGHT, false)
8889

8990
/*
9091
* UtilEstimation. Use estimated CPU utilization.

0 commit comments

Comments
 (0)