Increase futex hashtable size due to large core-count.

Avoid unnecessary RMW in the RCU tree code.

Finally, allow RCU more time before forcing grace periods.

Signed-off-by: Daniel J Blueman <daniel@quora.org>

diff --git a/kernel/futex.c b/kernel/futex.c
index 63678b5..d112be7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3030,7 +3030,7 @@ static int __init futex_init(void)
 #if CONFIG_BASE_SMALL
 	futex_hashsize = 16;
 #else
-	futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
+	futex_hashsize = roundup_pow_of_two(1024 * num_possible_cpus());
 #endif
 
 	futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 7680fc2..9330689 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -892,7 +892,7 @@ static int rcu_is_cpu_rrupt_from_idle(void)
 static int dyntick_save_progress_counter(struct rcu_data *rdp,
 					 bool *isidle, unsigned long *maxj)
 {
-	rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
+	rdp->dynticks_snap = atomic_read(&rdp->dynticks->dynticks);
 	rcu_sysidle_check_cpu(rdp, isidle, maxj);
 	if ((rdp->dynticks_snap & 0x1) == 0) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 8e7b184..90fdb32 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -392,7 +392,7 @@ struct rcu_data {
 					/* For jiffies_till_first_fqs and */
 					/*  and jiffies_till_next_fqs. */
 
-#define RCU_JIFFIES_FQS_DIV	256	/* Very large systems need more */
+#define RCU_JIFFIES_FQS_DIV	256	/* Very large systems need more */
 					/*  delay between bouts of */
 					/*  quiescent-state forcing. */
 
