@@ -3869,59 +3869,71 @@ bool __weak kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
38693869
38703870void kvm_vcpu_on_spin (struct kvm_vcpu * me , bool yield_to_kernel_mode )
38713871{
3872+ int nr_vcpus , start , i , idx , yielded ;
38723873 struct kvm * kvm = me -> kvm ;
38733874 struct kvm_vcpu * vcpu ;
3874- int last_boosted_vcpu ;
3875- unsigned long i ;
3876- int yielded = 0 ;
38773875 int try = 3 ;
3878- int pass ;
38793876
3880- last_boosted_vcpu = READ_ONCE (kvm -> last_boosted_vcpu );
3877+ nr_vcpus = atomic_read (& kvm -> online_vcpus );
3878+ if (nr_vcpus < 2 )
3879+ return ;
3880+
3881+ /* Pairs with the smp_wmb() in kvm_vm_ioctl_create_vcpu(). */
3882+ smp_rmb ();
3883+
38813884 kvm_vcpu_set_in_spin_loop (me , true);
3885+
38823886 /*
3883- * We boost the priority of a VCPU that is runnable but not
3884- * currently running, because it got preempted by something
3885- * else and called schedule in __vcpu_run. Hopefully that
3886- * VCPU is holding the lock that we need and will release it.
3887- * We approximate round-robin by starting at the last boosted VCPU.
3887+ * The current vCPU ("me") is spinning in kernel mode, i.e. is likely
3888+ * waiting for a resource to become available. Attempt to yield to a
3889+ * vCPU that is runnable, but not currently running, e.g. because the
3890+ * vCPU was preempted by a higher priority task. With luck, the vCPU
3891+ * that was preempted is holding a lock or some other resource that the
3892+ * current vCPU is waiting to acquire, and yielding to the other vCPU
3893+ * will allow it to make forward progress and release the lock (or kick
3894+ * the spinning vCPU, etc).
3895+ *
3896+ * Since KVM has no insight into what exactly the guest is doing,
3897+ * approximate a round-robin selection by iterating over all vCPUs,
3898+ * starting at the last boosted vCPU. I.e. if N=kvm->last_boosted_vcpu,
3899+ * iterate over vCPU[N+1]..vCPU[N-1], wrapping as needed.
3900+ *
3901+ * Note, this is inherently racy, e.g. if multiple vCPUs are spinning,
3902+ * they may all try to yield to the same vCPU(s). But as above, this
3903+ * is all best effort due to KVM's lack of visibility into the guest.
38883904 */
3889- for (pass = 0 ; pass < 2 && !yielded && try ; pass ++ ) {
3890- kvm_for_each_vcpu (i , vcpu , kvm ) {
3891- if (!pass && i <= last_boosted_vcpu ) {
3892- i = last_boosted_vcpu ;
3893- continue ;
3894- } else if (pass && i > last_boosted_vcpu )
3895- break ;
3896- if (!READ_ONCE (vcpu -> ready ))
3897- continue ;
3898- if (vcpu == me )
3899- continue ;
3900- if (kvm_vcpu_is_blocking (vcpu ) && !vcpu_dy_runnable (vcpu ))
3901- continue ;
3905+ start = READ_ONCE (kvm -> last_boosted_vcpu ) + 1 ;
3906+ for (i = 0 ; i < nr_vcpus ; i ++ ) {
3907+ idx = (start + i ) % nr_vcpus ;
3908+ if (idx == me -> vcpu_idx )
3909+ continue ;
39023910
3903- /*
3904- * Treat the target vCPU as being in-kernel if it has a
3905- * pending interrupt, as the vCPU trying to yield may
3906- * be spinning waiting on IPI delivery, i.e. the target
3907- * vCPU is in-kernel for the purposes of directed yield.
3908- */
3909- if (READ_ONCE (vcpu -> preempted ) && yield_to_kernel_mode &&
3910- !kvm_arch_dy_has_pending_interrupt (vcpu ) &&
3911- !kvm_arch_vcpu_preempted_in_kernel (vcpu ))
3912- continue ;
3913- if (!kvm_vcpu_eligible_for_directed_yield (vcpu ))
3914- continue ;
3911+ vcpu = xa_load (& kvm -> vcpu_array , idx );
3912+ if (!READ_ONCE (vcpu -> ready ))
3913+ continue ;
3914+ if (kvm_vcpu_is_blocking (vcpu ) && !vcpu_dy_runnable (vcpu ))
3915+ continue ;
39153916
3916- yielded = kvm_vcpu_yield_to (vcpu );
3917- if (yielded > 0 ) {
3918- WRITE_ONCE (kvm -> last_boosted_vcpu , i );
3919- break ;
3920- } else if (yielded < 0 ) {
3921- try -- ;
3922- if (!try )
3923- break ;
3924- }
3917+ /*
3918+ * Treat the target vCPU as being in-kernel if it has a pending
3919+ * interrupt, as the vCPU trying to yield may be spinning
3920+ * waiting on IPI delivery, i.e. the target vCPU is in-kernel
3921+ * for the purposes of directed yield.
3922+ */
3923+ if (READ_ONCE (vcpu -> preempted ) && yield_to_kernel_mode &&
3924+ !kvm_arch_dy_has_pending_interrupt (vcpu ) &&
3925+ !kvm_arch_vcpu_preempted_in_kernel (vcpu ))
3926+ continue ;
3927+
3928+ if (!kvm_vcpu_eligible_for_directed_yield (vcpu ))
3929+ continue ;
3930+
3931+ yielded = kvm_vcpu_yield_to (vcpu );
3932+ if (yielded > 0 ) {
3933+ WRITE_ONCE (kvm -> last_boosted_vcpu , i );
3934+ break ;
3935+ } else if (yielded < 0 && !-- try ) {
3936+ break ;
39253937 }
39263938 }
39273939 kvm_vcpu_set_in_spin_loop (me , false);
0 commit comments