MIME-Version: 1.0
Received: from AM4PR07MB1298.eurprd07.prod.outlook.com (10.164.81.156) by
 DB5PR07MB1301.eurprd07.prod.outlook.com (10.164.41.155) with Microsoft SMTP
 Server (TLS) id 15.1.274.16 via Mailbox Transport; Sun, 20 Sep 2015 17:02:28
 +0000
Authentication-Results: linutronix.de; dkim=none (message not signed)
 header.d=none;linutronix.de; dmarc=none action=none
 header.from=numascale.com;
Received: from localhost.localdomain (175.156.157.249) by
 AM4PR07MB1298.eurprd07.prod.outlook.com (10.164.81.156) with Microsoft SMTP
 Server (TLS) id 15.1.274.16; Sun, 20 Sep 2015 17:02:24 +0000
From: Daniel J Blueman <daniel@numascale.com>
To: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@redhat.com>, "H.
 Peter Anvin" <hpa@zytor.com>
CC: Daniel J Blueman <daniel@numascale.com>, <x86@kernel.org>,
	<linux-kernel@vger.kernel.org>, Daniel Lezcano <daniel.lezcano@linaro.org>,
	Steffen Persvold <sp@numascale.com>
Subject: [PATCH 3/4] x86: Add Numachip IPI optimisations
Date: Mon, 21 Sep 2015 01:02:01 +0800
Message-ID: <1442768522-19217-3-git-send-email-daniel@numascale.com>
X-Mailer: git-send-email 2.5.0
In-Reply-To: <1442768522-19217-1-git-send-email-daniel@numascale.com>
References: <1442768522-19217-1-git-send-email-daniel@numascale.com>
Content-Type: text/plain
X-MS-Exchange-Organization-Network-Message-Id: 88606272-f5a0-4db5-8bd6-08d2c1dd4356
X-MS-Exchange-Organization-AuthSource: AM4PR07MB1298.eurprd07.prod.outlook.com
X-MS-Exchange-Organization-AuthAs: Internal
X-MS-Exchange-Organization-AuthMechanism: 06
X-Originating-IP: [175.156.157.249]
X-ClientProxiedBy: DB5PR03CA0064.eurprd03.prod.outlook.com (25.164.34.32) To
 AM4PR07MB1298.eurprd07.prod.outlook.com (25.164.81.156)
Return-Path: daniel@numascale.com
X-Microsoft-Exchange-Diagnostics: 1;AM4PR07MB1298;2:EQKLNuVDuwtnYCgAPQyQOUR5vJfb5ZUqWxu4kEE3/x0JnbH+gMzrIG+tDNi5SJqZKpZIJLNHUFoRQT2szIW5HQOA6rp9ufs9xe6W2bYnJST/YGAXbkfHkkr95zzNrIuc8n5hd1gpRWPW9piLStVmZQb0yHxLNgKof1T4KTk7iiA=;3:m9FBF4AN+L3FSFb/xMdVfVOxguZnkt0Q/SIM7RyrV6LOgmpg/wtZoKPRMmomfYi0pcoybguUVuzpuYK3KGYF1EdZunPOk9zPDnNM71Ahn/i33PMPCBoz73v9unU0UPkYYv2mJ++ic6EF0KMj5CZ/RA==;25:8GJ6scBG+BJ6hB13H52CJ6PWi7XiNcq2qMQfPT556TclLhsQ7wkh6WPhkTLAUn7XdA4a5X9iLXI1XgE2GruLcjejcZ8erSYUnJplLQCCZNOjiMKZFpUI88BH65eho/BoyzGTX7OGiG3CY+ny//P8EooWK7bMO/NZDwUHVj3XVQ+nlndjLX6UBxfNaNxnuuhzDV/bxQSfjK/VQ/4TF1o7fMXbjM9Ww9giGLdhemgR9oV+bHdbiQaoY9vnYjPfmkggg/4GtAzJidbyTjV0eAgi5g==;4:6oA5wzAlCOqW45xOf08RBIxPK8ID+w9y7kOW95MVo4FTP02mGVtYHjWK4uptzROTh5b1R666z5Po07yN71pAPd9SBnWsNmIgeY7uvtRaSE2+BfTtDyqxoikAPEExZBm3XTbhir8oFdyQKfmFukWEWFPHIQRr2VqtQcS4ocAV7x38jg1eA3CSZf9eyHFyk3/X+FggiGWW9kQmkwoHQ1jrnVouW3nEnDXBk4mGZuFn0EYX2LKhQtSKmlGciKyZdAsd
X-Microsoft-Antispam: UriScan:;BCL:0;PCL:0;RULEID:;SRVR:AM4PR07MB1298;
X-MS-Exchange-Organization-AVStamp-Service: 1.0
X-Exchange-Antispam-Report-CFA: BCL:0;PCL:0;RULEID:;SRVR:AM4PR07MB1298;BCL:0;PCL:0;RULEID:;SRVR:AM4PR07MB1298;
X-Forefront-Antispam-Report: SFV:SKI;SFS:;DIR:INB;SFP:;SCL:-1;SRVR:AM4PR07MB1298;H:localhost.localdomain;FPR:;SPF:None;LANG:en;
X-MS-Exchange-Organization-SCL: -1
X-Microsoft-Exchange-Diagnostics: 1;AM4PR07MB1298;23:JyIZ5Co8NI+Rb5VTmDXoPfu9HpRKOD1qe+iIsIU2rhu0TBvfxRkwta5WBaaA/4T4DuRH6mMpwiZb3a/9xfxQHzt175GpvzmnkLOtC2KkwOmU5alOMn7ZDU12jewTX3KeB3/w/iNNnB7Np0V3GJSskinvfz6g+x2uvRq621zRI/I6Fz09DjPBPkGWkThOavpp;5:cuJL1oZu6d8NaDKEvgFcw0K7SNmEMyE+y9QVKlNtnUE82wPaDwUhyYnFKzJCb83yS1mdaVh5zD7mR50ZA0MnziTb538SwaT1DYNwQXPrp+LgY1M1R++Ulns9CfONpreIixIlkb9tDBw2SEmXKBK9vg==;24:V7q5cKXCOI9Xanf8Xv6k0T1I8fhDaWIuGYcqOeu1MyXy0Y8jNtOipddrY7DRkzXxJsLKOyO/mIMVe1b8A4zTj8ZAnk85KPSb7jIbQYjaGBc=;20:gt05GaVxbshGaDhwL30zYX5japsbfzOzZTPoDTNnY8FVYn1rfKTOTaeFUeQ4hu7im/5diS5gkX6rXaAm+NcycQ==
X-MS-Exchange-CrossTenant-OriginalArrivalTime: 20 Sep 2015 17:02:24.5588
 (UTC)
X-MS-Exchange-CrossTenant-FromEntityHeader: Hosted
X-MS-Exchange-Transport-CrossTenantHeadersStamped: AM4PR07MB1298
X-MS-Exchange-Organization-MessageDirectionality: Originating
X-MS-Exchange-Transport-EndToEndLatency: 00:00:03.9210298

When sending IPIs, first check if the non-local part of the source and
destination APIC IDs match; if so, send via the local APIC for efficiency.

Secondly, since the AMD BIOS-kernel developer guide states IPI delivery
will occur invarient of prior deliver status, avoid polling the delivery
status bit for efficiency.

Signed-off-by: Daniel J Blueman <daniel@numascale.com>
Acked-by: Steffen Persvold <sp@numascale.com>
---
 arch/x86/include/asm/numachip/numachip_csr.h |  1 +
 arch/x86/kernel/apic/apic_numachip.c         | 36 ++++++++++++++++++++++++----
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h
index c7efc25..75379f6 100644
--- a/arch/x86/include/asm/numachip/numachip_csr.h
+++ b/arch/x86/include/asm/numachip/numachip_csr.h
@@ -34,6 +34,7 @@
 #define NUMACHIP_LCSR_BASE	0x3ffffe000000ULL
 #define NUMACHIP_LCSR_LIM	0x3fffffffffffULL
 #define NUMACHIP_LCSR_SIZE	(NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1)
+#define NUMACHIP_LAPIC_BITS	8
 
 static inline void *lcsr_address(unsigned long offset)
 {
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index dfe2b1c..81bc216 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -95,9 +95,25 @@ static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 
 static void numachip_send_IPI_one(int cpu, int vector)
 {
-	int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+	int local_apicid, apicid = per_cpu(x86_cpu_to_apicid, cpu);
 	unsigned int dmode;
 
+	preempt_disable();
+	local_apicid = __this_cpu_read(x86_cpu_to_apicid);
+
+	/* Send via local APIC where non-local part matches */
+	if (!((apicid ^ local_apicid) >> NUMACHIP_LAPIC_BITS)) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		__default_send_IPI_dest_field(apicid, vector,
+			APIC_DEST_PHYSICAL);
+		local_irq_restore(flags);
+		preempt_enable();
+		return;
+	}
+	preempt_enable();
+
 	dmode = (vector == NMI_VECTOR) ? APIC_DM_NMI : APIC_DM_FIXED;
 	numachip_apic_icr_write(apicid, dmode | vector);
 }
@@ -217,6 +232,17 @@ static int numachip2_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 1;
 }
 
+/* APIC IPIs are queued */
+static void numachip_apic_wait_icr_idle(void)
+{
+}
+
+/* APIC NMI IPIs are queued */
+static u32 numachip_safe_apic_wait_icr_idle(void)
+{
+	return 0;
+}
+
 static const struct apic apic_numachip1 __refconst = {
 	.name				= "NumaConnect system",
 	.probe				= numachip1_probe,
@@ -262,8 +288,8 @@ static const struct apic apic_numachip1 __refconst = {
 	.eoi_write			= native_apic_mem_write,
 	.icr_read			= native_apic_icr_read,
 	.icr_write			= native_apic_icr_write,
-	.wait_icr_idle			= native_apic_wait_icr_idle,
-	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+	.wait_icr_idle			= numachip_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= numachip_safe_apic_wait_icr_idle,
 };
 
 apic_driver(apic_numachip1);
@@ -313,8 +339,8 @@ static const struct apic apic_numachip2 __refconst = {
 	.eoi_write			= native_apic_mem_write,
 	.icr_read			= native_apic_icr_read,
 	.icr_write			= native_apic_icr_write,
-	.wait_icr_idle			= native_apic_wait_icr_idle,
-	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+	.wait_icr_idle			= numachip_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= numachip_safe_apic_wait_icr_idle,
 };
 
 apic_driver(apic_numachip2);
-- 
2.5.0

