diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index f9c8d9f..3b36127 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -414,7 +414,7 @@ EXPORT_SYMBOL(sn_send_IPI_phys);
  * %IA64_IPI_DM_NMI - pend an NMI
  * %IA64_IPI_DM_INIT - pend an INIT interrupt
  */
-void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
+void sn2_send_IPI_one(int cpuid, int vector, int delivery_mode, int redirect)
 {
 	long physid;
 	int nasid;
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index ce7aea3..917ec4d 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -104,7 +104,7 @@ static void send_IPI_mask(const struct cpumask *, int, int);
 void smp_send_reschedule(int cpu_id)
 {
 	WARN_ON(cpu_is_offline(cpu_id));
-	send_IPI_mask(cpumask_of(cpu_id), RESCHEDULE_IPI, 1);
+	send_IPI_one(cpu_id, RESCHEDULE_IPI, 1);
 }
 
 /*==========================================================================*
@@ -542,7 +542,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	send_IPI_mask(cpumask_of(cpu), CALL_FUNC_SINGLE_IPI, 0);
+	send_IPI_one(cpu, CALL_FUNC_SINGLE_IPI, 0);
 }
 
 /*==========================================================================*
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index f984193..34d69d4 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -334,7 +334,7 @@ static void send_IPI_mask(const cpumask_t *cpumask, int irq)
  */
 void send_IPI_self(int irq)
 {
-	send_IPI_mask(cpumask_of(smp_processor_id()), irq);
+	send_IPI_one(smp_processor_id(), irq);
 }
 
 /**
@@ -362,7 +362,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	send_IPI_mask(cpumask_of(cpu), CALL_FUNC_SINGLE_IPI);
+	send_IPI_one(cpu, CALL_FUNC_SINGLE_IPI);
 }
 
 /**
@@ -371,7 +371,7 @@ void arch_send_call_function_single_ipi(int cpu)
  */
 void smp_send_reschedule(int cpu)
 {
-	send_IPI_mask(cpumask_of(cpu), RESCHEDULE_IPI);
+	send_IPI_one(cpu, RESCHEDULE_IPI);
 }
 
 /**
@@ -773,7 +773,7 @@ static int __init do_boot_cpu(int phy_id)
 	task_thread_info(idle)->cpu = cpu_id;
 
 	/* Send boot IPI to AP */
-	send_IPI_mask(cpumask_of(phy_id), SMP_BOOT_IRQ);
+	send_IPI_one(phy_id, SMP_BOOT_IRQ);
 
 	Dprintk("Waiting for send to finish...\n");
 
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index efc3b22..8c89eaa 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -304,6 +304,7 @@ struct apic {
 				      unsigned int *apicid);
 
 	/* ipi */
+	void (*send_IPI_one)(int cpu, int vector);
 	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
 	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
 					 int vector);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 615fa90..5483e9d 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -119,6 +119,7 @@ static inline void
 	native_apic_mem_write(APIC_ICR, cfg);
 }
 
+extern void default_send_IPI_one(int cpu, int vector);
 extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
 						 int vector);
 extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
@@ -146,6 +147,7 @@ static inline void __default_local_send_IPI_all(int vector)
 }
 
 #ifdef CONFIG_X86_32
+extern void default_send_IPI_one(int cpu, int vector);
 extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
 							 int vector);
 extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index de918c4..2713380 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -185,6 +185,7 @@ static struct apic apic_flat =  {
 
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
+	.send_IPI_one			= default_send_IPI_one,
 	.send_IPI_mask			= flat_send_IPI_mask,
 	.send_IPI_mask_allbutself	= flat_send_IPI_mask_allbutself,
 	.send_IPI_allbutself		= flat_send_IPI_allbutself,
@@ -293,6 +294,7 @@ static struct apic apic_physflat =  {
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
+	.send_IPI_one			= default_send_IPI_one,
 	.send_IPI_mask			= physflat_send_IPI_mask,
 	.send_IPI_mask_allbutself	= physflat_send_IPI_mask_allbutself,
 	.send_IPI_allbutself		= physflat_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index b205cdb..969c593 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -30,6 +30,7 @@
 #include <asm/e820.h>
 
 static void noop_init_apic_ldr(void) { }
+static void noop_send_IPI_one(int cpu, int vector) { }
 static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { }
 static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { }
 static void noop_send_IPI_allbutself(int vector) { }
@@ -144,6 +145,7 @@ struct apic apic_noop = {
 
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
+	.send_IPI_one			= noop_send_IPI_one,
 	.send_IPI_mask			= noop_send_IPI_mask,
 	.send_IPI_mask_allbutself	= noop_send_IPI_mask_allbutself,
 	.send_IPI_allbutself		= noop_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index d6800c2..d9c90da 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -249,6 +249,7 @@ static const struct apic apic_numachip __refconst = {
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
+	.send_IPI_one			= numachip_send_IPI_one,
 	.send_IPI_mask			= numachip_send_IPI_mask,
 	.send_IPI_mask_allbutself	= numachip_send_IPI_mask_allbutself,
 	.send_IPI_allbutself		= numachip_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 6207156..1d2f801 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -18,6 +18,15 @@
 #include <asm/proto.h>
 #include <asm/ipi.h>
 
+void default_send_IPI_one(int cpu, int vector)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, cpu), vector, APIC_DEST_PHYSICAL);
+	local_irq_restore(flags);
+}
+
 void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
 {
 	unsigned long query_cpu;
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index bda4886..3265d6b 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -105,6 +105,7 @@ static struct apic apic_default = {
 
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
+	.send_IPI_one			= default_send_IPI_one_logical,
 	.send_IPI_mask			= default_send_IPI_mask_logical,
 	.send_IPI_mask_allbutself	= default_send_IPI_mask_allbutself_logical,
 	.send_IPI_allbutself		= default_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6cedd79..3d11bd6 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -327,7 +327,7 @@ int apic_retrigger_irq(struct irq_data *data)
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
 	cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
-	apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
+	apic->send_IPI_one(cpu, cfg->vector);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
@@ -383,8 +383,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
 		unsigned int i;
 
 		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i),
-					    IRQ_MOVE_CLEANUP_VECTOR);
+			apic->send_IPI_one(i, IRQ_MOVE_CLEANUP_VECTOR);
 	} else {
 		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
 		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index e658f21..413afa0 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -24,6 +24,53 @@ static inline u32 x2apic_cluster(int cpu)
 }
 
 static void
+x2apic_send_IPI_one(int cpu, int vector)
+{
+	struct cpumask *cpus_in_cluster_ptr;
+	struct cpumask *ipi_mask_ptr;
+	unsigned int this_cpu;
+	unsigned long flags;
+	u32 dest;
+
+	x2apic_wrmsr_fence();
+
+	local_irq_save(flags);
+
+	this_cpu = smp_processor_id();
+
+	/*
+	 * We are to modify mask, so we need an own copy
+	 * and be sure it's manipulated with irq off.
+	 */
+	ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask);
+	cpumask_clear(ipi_mask_ptr);
+	cpumask_set_cpu(cpu, ipi_mask_ptr);
+
+	/*
+	 * The idea is to send one IPI per cluster.
+	 */
+	{
+		unsigned long i;
+
+		cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu);
+		dest = 0;
+
+		/* Collect cpus in cluster. */
+		for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr)
+				dest |= per_cpu(x86_cpu_to_logical_apicid, i);
+
+		__x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
+		/*
+		 * Cluster sibling cpus should be discared now so
+		 * we would not send IPI them second time.
+		 */
+		cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr);
+	}
+
+	local_irq_restore(flags);
+}
+
+static void
 __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
 {
 	struct cpumask *cpus_in_cluster_ptr;
@@ -266,6 +313,7 @@ static struct apic apic_x2apic_cluster = {
 
 	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,
 
+	.send_IPI_one			= x2apic_send_IPI_one,
 	.send_IPI_mask			= x2apic_send_IPI_mask,
 	.send_IPI_mask_allbutself	= x2apic_send_IPI_mask_allbutself,
 	.send_IPI_allbutself		= x2apic_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 6fae733..5890009 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -35,6 +35,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 }
 
 static void
+x2apic_send_IPI_one(int cpu, int vector)
+{
+	unsigned long flags;
+
+	x2apic_wrmsr_fence();
+
+	local_irq_save(flags);
+	__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, cpu), vector, APIC_DEST_PHYSICAL);
+	local_irq_restore(flags);
+}
+
+static void
 __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
 {
 	unsigned long query_cpu;
@@ -120,6 +132,7 @@ static struct apic apic_x2apic_phys = {
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
+	.send_IPI_one			= x2apic_send_IPI_one,
 	.send_IPI_mask			= x2apic_send_IPI_mask,
 	.send_IPI_mask_allbutself	= x2apic_send_IPI_mask_allbutself,
 	.send_IPI_allbutself		= x2apic_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 8e9dcfd..1b2beb1 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -380,6 +380,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
 
 	.cpu_mask_to_apicid_and		= uv_cpu_mask_to_apicid_and,
 
+	.send_IPI_one			= uv_send_IPI_one,
 	.send_IPI_mask			= uv_send_IPI_mask,
 	.send_IPI_mask_allbutself	= uv_send_IPI_mask_allbutself,
 	.send_IPI_allbutself		= uv_send_IPI_allbutself,
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index be8e1bd..6bb10cd 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -124,12 +124,12 @@ static void native_smp_send_reschedule(int cpu)
 		WARN_ON(1);
 		return;
 	}
-	apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
+	apic->send_IPI_one(cpu, RESCHEDULE_VECTOR);
 }
 
 void native_send_call_func_single_ipi(int cpu)
 {
-	apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+	apic->send_IPI_one(cpu, CALL_FUNCTION_SINGLE_VECTOR);
 }
 
 void native_send_call_func_ipi(const struct cpumask *mask)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ae4f6d3..f0606e6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4377,8 +4377,7 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_SMP
 	if (vcpu->mode == IN_GUEST_MODE) {
-		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
-				POSTED_INTR_VECTOR);
+		apic->send_IPI_one(vcpu->cpu, POSTED_INTR_VECTOR);
 		return true;
 	}
 #endif
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5240f56..8fc4ea2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1003,6 +1003,7 @@ static void set_xen_basic_apic_ops(void)
 	apic->get_apic_id = xen_get_apic_id;
 
 #ifdef CONFIG_SMP
+	apic->send_IPI_one = xen_send_IPI_one;
 	apic->send_IPI_allbutself = xen_send_IPI_allbutself;
 	apic->send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself;
 	apic->send_IPI_mask = xen_send_IPI_mask;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 08e8489..ba39465 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -599,8 +599,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
 
 static void xen_smp_send_call_function_single_ipi(int cpu)
 {
-	__xen_send_IPI_mask(cpumask_of(cpu),
-			  XEN_CALL_FUNCTION_SINGLE_VECTOR);
+	__xen_send_IPI_one(cpu, XEN_CALL_FUNCTION_SINGLE_VECTOR);
 }
 
 static inline int xen_map_vector(int vector)
