From e89db675171a7a12f19b6ec0089a9cc62807cdf1 Mon Sep 17 00:00:00 2001
From: Camelia Groza <camelia.groza@nxp.com>
Date: Tue, 29 Oct 2019 16:34:08 +0200
Subject: [PATCH] sdk_dpaa: ls1043a errata: update and optimize the
 restrictions

An skb is in no danger of triggering the errata under the following
conditions:
- the paged data doesn't cross a 4K page boundary OR the linear data
is aligned to 256 bytes when crossing a 4K page boundary
- the linear and the paged data are 16 byte aligned
- the paged data is a multiple of 16 bytes in size

Optimize the detection for each skb that might trigger the errata. Parse
the skb twice, at most, and realign it only once.

Signed-off-by: Camelia Groza <camelia.groza@nxp.com>
---
 drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h |   2 +-
 .../net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c  | 147 +++++++++++++++------
 2 files changed, 111 insertions(+), 38 deletions(-)

--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
@@ -662,7 +662,7 @@ static inline void _dpa_bp_free_pf(void
 #ifndef CONFIG_PPC
 extern bool dpaa_errata_a010022; /* SoC affected by A010022 errata */
 #define NONREC_MARK	0x01
-#define HAS_DMA_ISSUE(start, size) \
+#define CROSS_4K(start, size) \
 	(((uintptr_t)(start) + (size)) > \
 	 (((uintptr_t)(start) + 0x1000) & ~0xFFF))
 /* The headroom needs to accommodate our private data (64 bytes) but
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c
@@ -771,32 +771,73 @@ int __hot skb_to_contig_fd(struct dpa_pr
 EXPORT_SYMBOL(skb_to_contig_fd);
 
 #ifndef CONFIG_PPC
-/* Verify the conditions that trigger the A010022 errata: data unaligned to
- * 16 bytes, 4K memory address crossings and S/G fragments.
+/* Verify the conditions that trigger the A010022 errata:
+ * - 4K memory address boundary crossings when the data/SG fragments aren't
+ *   aligned to 256 bytes
+ * - data and SG fragments that aren't aligned to 16 bytes
+ * - SG fragments that aren't mod 16 bytes in size (except for the last
+ *   fragment)
  */
 static bool a010022_check_skb(struct sk_buff *skb, struct dpa_priv_s *priv)
 {
-	/* Check if the headroom is aligned */
-	if (((uintptr_t)skb->data - priv->tx_headroom) %
-	    priv->buf_layout[TX].data_align != 0)
-		return true;
+	skb_frag_t *frag;
+	int i, nr_frags;
 
-	/* Check for paged data in the skb. We do not support S/G fragments */
-	if (skb_is_nonlinear(skb))
+	nr_frags = skb_shinfo(skb)->nr_frags;
+
+	/* Check if the linear data is 16 byte aligned */
+	if ((uintptr_t)skb->data % 16)
 		return true;
 
-	/* Check if the headroom crosses a boundary */
-	if (HAS_DMA_ISSUE(skb->head, skb_headroom(skb)))
+	/* Check if the needed headroom crosses a 4K address boundary without
+	 * being 256 byte aligned
+	 */
+	if (CROSS_4K(skb->data - priv->tx_headroom, priv->tx_headroom) &&
+	    (((uintptr_t)skb->data - priv->tx_headroom) % 256))
 		return true;
 
-	/* Check if the non-paged data crosses a boundary */
-	if (HAS_DMA_ISSUE(skb->data, skb_headlen(skb)))
+	/* Check if the linear data crosses a 4K address boundary without
+	 * being 256 byte aligned
+	 */
+	if (CROSS_4K(skb->data, skb_headlen(skb)) &&
+	    ((uintptr_t)skb->data % 256))
 		return true;
 
-	/* Check if the entire linear skb crosses a boundary */
-	if (HAS_DMA_ISSUE(skb->head, skb_end_offset(skb)))
+	/* When using Scatter/Gather, the linear data becomes the first
+	 * fragment in the list and must follow the same restrictions as the
+	 * other fragments.
+	 *
+	 * Check if the linear data is mod 16 bytes in size.
+	 */
+	if (nr_frags && (skb_headlen(skb) % 16))
 		return true;
 
+	/* Check the SG fragments. They must follow the same rules as the
+	 * linear data with and additional restriction: they must be multiple
+	 * of 16 bytes in size to account for the hardware carryover effect.
+	 */
+	for (i = 0; i < nr_frags; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+
+		/* Check if the fragment is a multiple of 16 bytes in size.
+		 * The last fragment is exempt from this restriction.
+		 */
+		if ((i != (nr_frags - 1)) && (skb_frag_size(frag) % 16))
+			return true;
+
+		/* Check if the fragment is 16 byte aligned */
+		if (skb_frag_off(frag) % 16)
+			return true;
+
+		/* Check if the fragment crosses a 4K address boundary. Since
+		 * the alignment of previous fragments can influence the
+		 * current fragment, checking for the 256 byte alignment
+		 * isn't relevant.
+		 */
+		if (CROSS_4K(skb_frag_off(frag), skb_frag_size(frag)))
+			return true;
+	}
+
 	return false;
 }
 
@@ -1062,10 +1103,24 @@ int __hot dpa_tx_extended(struct sk_buff
 	struct dpa_percpu_priv_s *percpu_priv;
 	struct rtnl_link_stats64 *percpu_stats;
 	int err = 0;
-	bool nonlinear;
+	bool nonlinear, skb_changed, skb_need_wa;
 	int *countptr, offset = 0;
 	struct sk_buff *nskb;
 
+	/* Flags to help optimize the A010022 errata restriction checks.
+	 *
+	 * First flag marks if the skb changed between the first A010022 check
+	 * and the moment it's converted to an FD.
+	 *
+	 * The second flag marks if the skb needs to be realigned in order to
+	 * avoid the errata.
+	 *
+	 * The flags should have minimal impact on platforms not impacted by
+	 * the errata.
+	 */
+	skb_changed = false;
+	skb_need_wa = false;
+
 	priv = netdev_priv(net_dev);
 	/* Non-migratable context, safe to use raw_cpu_ptr */
 	percpu_priv = raw_cpu_ptr(priv->percpu_priv);
@@ -1075,13 +1130,8 @@ int __hot dpa_tx_extended(struct sk_buff
 	clear_fd(&fd);
 
 #ifndef CONFIG_PPC
-	if (unlikely(dpaa_errata_a010022) && a010022_check_skb(skb, priv)) {
-		nskb = a010022_realign_skb(skb, priv);
-		if (!nskb)
-			goto skb_to_fd_failed;
-		dev_kfree_skb(skb);
-		skb = nskb;
-	}
+	if (unlikely(dpaa_errata_a010022) && a010022_check_skb(skb, priv))
+		skb_need_wa = true;
 #endif
 
 	nonlinear = skb_is_nonlinear(skb);
@@ -1102,8 +1152,8 @@ int __hot dpa_tx_extended(struct sk_buff
 	 * Btw, we're using the first sgt entry to store the linear part of
 	 * the skb, so we're one extra frag short.
 	 */
-	if (nonlinear &&
-		likely(skb_shinfo(skb)->nr_frags < DPA_SGT_MAX_ENTRIES)) {
+	if (nonlinear && !skb_need_wa &&
+	    likely(skb_shinfo(skb)->nr_frags < DPA_SGT_MAX_ENTRIES)) {
 		/* Just create a S/G fd based on the skb */
 		err = skb_to_sg_fd(priv, skb, &fd);
 		percpu_priv->tx_frag_skbuffs++;
@@ -1128,39 +1178,62 @@ int __hot dpa_tx_extended(struct sk_buff
 
 			dev_kfree_skb(skb);
 			skb = skb_new;
+			skb_changed = true;
 		}
 
 		/* We're going to store the skb backpointer at the beginning
 		 * of the data buffer, so we need a privately owned skb
+		 *
+		 * Under the A010022 errata, we are going to have a privately
+		 * owned skb after realigning the current one, so no point in
+		 * copying it here in that case.
 		 */
 
 		/* Code borrowed from skb_unshare(). */
-		if (skb_cloned(skb)) {
+		if (skb_cloned(skb) && !skb_need_wa) {
 			nskb = skb_copy(skb, GFP_ATOMIC);
 			kfree_skb(skb);
 			skb = nskb;
-#ifndef CONFIG_PPC
-			if (unlikely(dpaa_errata_a010022) &&
-			    a010022_check_skb(skb, priv)) {
-				nskb = a010022_realign_skb(skb, priv);
-				if (!nskb)
-					goto skb_to_fd_failed;
-				dev_kfree_skb(skb);
-				skb = nskb;
-			}
-#endif
+			skb_changed = true;
+
 			/* skb_copy() has now linearized the skbuff. */
-		} else if (unlikely(nonlinear)) {
+		} else if (unlikely(nonlinear) && !skb_need_wa) {
 			/* We are here because the egress skb contains
 			 * more fragments than we support. In this case,
 			 * we have no choice but to linearize it ourselves.
 			 */
-			err = __skb_linearize(skb);
+#ifndef CONFIG_PPC
+			/* No point in linearizing the skb now if we are going
+			 * to realign and linearize it again further down due
+			 * to the A010022 errata
+			 */
+			if (unlikely(dpaa_errata_a010022))
+				skb_need_wa = true;
+			else
+#endif
+				err = __skb_linearize(skb);
 		}
 		if (unlikely(!skb || err < 0))
 			/* Common out-of-memory error path */
 			goto enomem;
 
+#ifndef CONFIG_PPC
+		/* Verify the skb a second time if it has been updated since
+		 * the previous check
+		 */
+		if (unlikely(dpaa_errata_a010022) && skb_changed &&
+		    a010022_check_skb(skb, priv))
+			skb_need_wa = true;
+
+		if (unlikely(dpaa_errata_a010022) && skb_need_wa) {
+			nskb = a010022_realign_skb(skb, priv);
+			if (!nskb)
+				goto skb_to_fd_failed;
+			dev_kfree_skb(skb);
+			skb = nskb;
+		}
+#endif
+
 		err = skb_to_contig_fd(priv, skb, &fd, countptr, &offset);
 	}
 	if (unlikely(err < 0))