7f83738b46
0001 ~ 0014 patches are for virtual channel and PMD 0015 is the iavf fdir framework 0016 ~ 0017 are for the iavf fidr driver Type: feature Signed-off-by: Chenmin Sun <chenmin.sun@intel.com> Change-Id: I38e69ca0065a71cc6ba0b44ef7c7db51193a0899
134 lines
4.5 KiB
Diff
134 lines
4.5 KiB
Diff
From d338aa7cb45638b3a14177a8d83ef01c4ec20d1b Mon Sep 17 00:00:00 2001
|
|
From: Leyi Rong <leyi.rong@intel.com>
|
|
Date: Wed, 8 Apr 2020 14:22:09 +0800
|
|
Subject: [DPDK 14/17] net/iavf: add RSS hash parsing in AVX path
|
|
|
|
Support RSS hash parsing from Flex Rx
|
|
descriptor in AVX data path.
|
|
|
|
Signed-off-by: Leyi Rong <leyi.rong@intel.com>
|
|
---
|
|
drivers/net/iavf/iavf_rxtx_vec_avx2.c | 92 ++++++++++++++++++++++++++-
|
|
1 file changed, 90 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
|
|
index 3bf5833fa..22f1b7887 100644
|
|
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
|
|
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
|
|
@@ -698,7 +698,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
|
|
_mm256_set_epi8
|
|
(/* first descriptor */
|
|
0xFF, 0xFF,
|
|
- 0xFF, 0xFF, /* rss not supported */
|
|
+ 0xFF, 0xFF, /* rss hash parsed separately */
|
|
11, 10, /* octet 10~11, 16 bits vlan_macip */
|
|
5, 4, /* octet 4~5, 16 bits data_len */
|
|
0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */
|
|
@@ -707,7 +707,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
|
|
0xFF, 0xFF, /*pkt_type set as unknown */
|
|
/* second descriptor */
|
|
0xFF, 0xFF,
|
|
- 0xFF, 0xFF, /* rss not supported */
|
|
+ 0xFF, 0xFF, /* rss hash parsed separately */
|
|
11, 10, /* octet 10~11, 16 bits vlan_macip */
|
|
5, 4, /* octet 4~5, 16 bits data_len */
|
|
0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */
|
|
@@ -994,6 +994,94 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
|
|
_mm256_extract_epi32(fdir_id0_7, 4);
|
|
} /* if() on fdir_enabled */
|
|
|
|
+ /**
|
|
+ * needs to load 2nd 16B of each desc for RSS hash parsing,
|
|
+ * will cause performance drop to get into this context.
|
|
+ */
|
|
+ if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
|
|
+ DEV_RX_OFFLOAD_RSS_HASH) {
|
|
+ /* load bottom half of every 32B desc */
|
|
+ const __m128i raw_desc_bh7 =
|
|
+ _mm_load_si128
|
|
+ ((void *)(&rxdp[7].wb.status_error1));
|
|
+ rte_compiler_barrier();
|
|
+ const __m128i raw_desc_bh6 =
|
|
+ _mm_load_si128
|
|
+ ((void *)(&rxdp[6].wb.status_error1));
|
|
+ rte_compiler_barrier();
|
|
+ const __m128i raw_desc_bh5 =
|
|
+ _mm_load_si128
|
|
+ ((void *)(&rxdp[5].wb.status_error1));
|
|
+ rte_compiler_barrier();
|
|
+ const __m128i raw_desc_bh4 =
|
|
+ _mm_load_si128
|
|
+ ((void *)(&rxdp[4].wb.status_error1));
|
|
+ rte_compiler_barrier();
|
|
+ const __m128i raw_desc_bh3 =
|
|
+ _mm_load_si128
|
|
+ ((void *)(&rxdp[3].wb.status_error1));
|
|
+ rte_compiler_barrier();
|
|
+ const __m128i raw_desc_bh2 =
|
|
+ _mm_load_si128
|
|
+ ((void *)(&rxdp[2].wb.status_error1));
|
|
+ rte_compiler_barrier();
|
|
+ const __m128i raw_desc_bh1 =
|
|
+ _mm_load_si128
|
|
+ ((void *)(&rxdp[1].wb.status_error1));
|
|
+ rte_compiler_barrier();
|
|
+ const __m128i raw_desc_bh0 =
|
|
+ _mm_load_si128
|
|
+ ((void *)(&rxdp[0].wb.status_error1));
|
|
+
|
|
+ __m256i raw_desc_bh6_7 =
|
|
+ _mm256_inserti128_si256
|
|
+ (_mm256_castsi128_si256(raw_desc_bh6),
|
|
+ raw_desc_bh7, 1);
|
|
+ __m256i raw_desc_bh4_5 =
|
|
+ _mm256_inserti128_si256
|
|
+ (_mm256_castsi128_si256(raw_desc_bh4),
|
|
+ raw_desc_bh5, 1);
|
|
+ __m256i raw_desc_bh2_3 =
|
|
+ _mm256_inserti128_si256
|
|
+ (_mm256_castsi128_si256(raw_desc_bh2),
|
|
+ raw_desc_bh3, 1);
|
|
+ __m256i raw_desc_bh0_1 =
|
|
+ _mm256_inserti128_si256
|
|
+ (_mm256_castsi128_si256(raw_desc_bh0),
|
|
+ raw_desc_bh1, 1);
|
|
+
|
|
+ /**
|
|
+ * to shift the 32b RSS hash value to the
|
|
+ * highest 32b of each 128b before mask
|
|
+ */
|
|
+ __m256i rss_hash6_7 =
|
|
+ _mm256_slli_epi64(raw_desc_bh6_7, 32);
|
|
+ __m256i rss_hash4_5 =
|
|
+ _mm256_slli_epi64(raw_desc_bh4_5, 32);
|
|
+ __m256i rss_hash2_3 =
|
|
+ _mm256_slli_epi64(raw_desc_bh2_3, 32);
|
|
+ __m256i rss_hash0_1 =
|
|
+ _mm256_slli_epi64(raw_desc_bh0_1, 32);
|
|
+
|
|
+ __m256i rss_hash_msk =
|
|
+ _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
|
|
+ 0xFFFFFFFF, 0, 0, 0);
|
|
+
|
|
+ rss_hash6_7 = _mm256_and_si256
|
|
+ (rss_hash6_7, rss_hash_msk);
|
|
+ rss_hash4_5 = _mm256_and_si256
|
|
+ (rss_hash4_5, rss_hash_msk);
|
|
+ rss_hash2_3 = _mm256_and_si256
|
|
+ (rss_hash2_3, rss_hash_msk);
|
|
+ rss_hash0_1 = _mm256_and_si256
|
|
+ (rss_hash0_1, rss_hash_msk);
|
|
+
|
|
+ mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
|
|
+ mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
|
|
+ mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
|
|
+ mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
|
|
+ } /* if() on RSS hash parsing */
|
|
+
|
|
/**
|
|
* At this point, we have the 8 sets of flags in the low 16-bits
|
|
* of each 32-bit value in vlan0.
|
|
--
|
|
2.17.1
|
|
|