@@ -244,9 +244,8 @@ void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi)
244244 * only NAF_NETMAP_ON instead of NAF_NATIVE_ON to enable the irq.
245245 */
246246void
247- netmap_generic_irq (struct ifnet * ifp , u_int q , u_int * work_done )
247+ netmap_generic_irq (struct netmap_adapter * na , u_int q , u_int * work_done )
248248{
249- struct netmap_adapter * na = NA (ifp );
250249 if (unlikely (!nm_netmap_on (na )))
251250 return ;
252251
@@ -303,6 +302,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
303302 */
304303 for (r = 0 ; r < na -> num_tx_rings ; r ++ )
305304 na -> tx_rings [r ].tx_pool = NULL ;
305+
306306 for (r = 0 ; r < na -> num_tx_rings ; r ++ ) {
307307 na -> tx_rings [r ].tx_pool = malloc (na -> num_tx_desc * sizeof (struct mbuf * ),
308308 M_DEVBUF , M_NOWAIT | M_ZERO );
@@ -322,6 +322,9 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
322322 }
323323 na -> tx_rings [r ].tx_pool [i ] = m ;
324324 }
325+ na -> tx_rings [r ].tx_event = NULL ;
326+ mtx_init (& na -> tx_rings [r ].tx_event_lock ,
327+ "tx_event_lock" , NULL , MTX_SPIN );
325328 }
326329
327330 rtnl_lock ();
@@ -387,18 +390,23 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
387390 /* Decrement reference counter for the mbufs in the
388391 * TX pools. These mbufs can be still pending in drivers,
389392 * (e.g. this happens with virtio-net driver, which
390- * does lazy reclaiming of transmitted mbufs).
391- * Therefore it is necessary to remove the
392- * destructor (which invokes netmap code), since the
393- * the netmap module may disappear before pending mbufs
394- * are consumed. */
393+ * does lazy reclaiming of transmitted mbufs). */
395394 for (r = 0 ; r < na -> num_tx_rings ; r ++ ) {
395+ /* We must remove the destructor on the TX event,
396+ * because the destructor invokes netmap code, and
397+ * the netmap module may disappear before the
398+ * TX event is consumed. */
399+ mtx_lock (& na -> tx_rings [r ].tx_event_lock );
400+ m = na -> tx_rings [r ].tx_event ;
401+ if (m ) {
402+ SET_MBUF_DESTRUCTOR (m , NULL );
403+ }
404+ na -> tx_rings [r ].tx_event = NULL ;
405+ mtx_unlock (& na -> tx_rings [r ].tx_event_lock );
406+ mtx_destroy (& na -> tx_rings [r ].tx_event_lock );
407+
396408 for (i = 0 ; i < na -> num_tx_desc ; i ++ ) {
397- m = na -> tx_rings [r ].tx_pool [i ];
398- if (m ) {
399- SET_MBUF_DESTRUCTOR (m , NULL );
400- m_freem (m );
401- }
409+ m_freem (na -> tx_rings [r ].tx_pool [i ]);
402410 }
403411 free (na -> tx_rings [r ].tx_pool , M_DEVBUF );
404412 }
@@ -452,10 +460,34 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
452460static void
453461generic_mbuf_destructor (struct mbuf * m )
454462{
455- netmap_generic_irq (MBUF_IFP (m ), MBUF_TXQ (m ), NULL );
463+ struct netmap_adapter * na = NA (MBUF_IFP (m ));
464+ struct netmap_kring * kring ;
465+ unsigned int r = MBUF_TXQ (m );
466+
467+ if (unlikely (!nm_netmap_on (na ) || r >= na -> num_tx_rings )) {
468+ /* Unfortunately, certain drivers (like the linux bridge
469+ * driver, or the linux veth driver) change the MBUF_IFP(m)
470+ * that was set by generic_xmit_frame, so that here we end
471+ * up with a NULL na, or a different na.
472+ */
473+ RD (3 , "Warning: driver modified MBUF_IFP(m)" );
474+ return ;
475+ }
476+
477+ /* First, clear the event. */
478+ kring = & na -> tx_rings [r ];
479+ mtx_lock (& kring -> tx_event_lock );
480+ kring -> tx_event = NULL ;
481+ mtx_unlock (& kring -> tx_event_lock );
482+
483+ /* Second, wake up clients, that will reclaim
484+ * the event through txsync. */
485+ netmap_generic_irq (na , r , NULL );
456486#ifdef __FreeBSD__
457- if (netmap_verbose )
458- RD (5 , "Tx irq (%p) queue %d index %d" , m , MBUF_TXQ (m ), (int )(uintptr_t )m -> m_ext .ext_arg1 );
487+ if (netmap_verbose ) {
488+ RD (5 , "Tx irq (%p) queue %d index %d" , m , r ,
489+ (int )(uintptr_t )m -> m_ext .ext_arg1 );
490+ }
459491 netmap_default_mbuf_destructor (m );
460492#endif /* __FreeBSD__ */
461493}
@@ -496,11 +528,21 @@ generic_netmap_tx_clean(struct netmap_kring *kring, int txqdisc)
496528
497529 } else {
498530 if (unlikely (m == NULL )) {
499- /* this is done, try to replenish the entry */
500- replenish = 1 ;
531+ /* This slot has been used to place an event. */
532+ mtx_lock (& kring -> tx_event_lock );
533+ replenish = (kring -> tx_event == NULL );
534+ mtx_unlock (& kring -> tx_event_lock );
535+ if (!replenish ) {
536+ /* The event has not been consumed yet,
537+ * still busy in the driver. */
538+ break ;
539+ }
540+ /* The event has been consumed, we have to
541+ * replenish it and go ahead. */
501542
502543 } else if (MBUF_REFCNT (m ) != 1 ) {
503- break ; /* This mbuf is still busy: its refcnt is 2. */
544+ /* This mbuf is still busy: its refcnt is 2. */
545+ break ;
504546 }
505547 }
506548
@@ -541,9 +583,9 @@ generic_netmap_tx_clean(struct netmap_kring *kring, int txqdisc)
541583 return n ;
542584}
543585
544-
586+ /* Compute a slot index in the middle between inf and sup. */
545587static inline u_int
546- generic_tx_event_middle (u_int inf , u_int sup , u_int lim )
588+ ring_middle (u_int inf , u_int sup , u_int lim )
547589{
548590 u_int n = lim + 1 ;
549591 u_int e ;
@@ -565,42 +607,59 @@ generic_tx_event_middle(u_int inf, u_int sup, u_int lim)
565607 return e ;
566608}
567609
568- /*
569- * We have pending packets in the driver between nr_hwtail+1 and hwcur.
570- * Schedule a notification approximately in the middle of the two.
571- * There is a race but this is only called within txsync which does
572- * a double check.
573- */
574610static void
575611generic_set_tx_event (struct netmap_kring * kring , u_int hwcur )
576612{
577613 u_int lim = kring -> nkr_num_slots - 1 ;
578614 struct mbuf * m ;
579615 u_int e ;
616+ u_int ntc = nm_next (kring -> nr_hwtail , lim ); /* next to clean */
580617
581- if (nm_next ( kring -> nr_hwtail , lim ) == hwcur ) {
618+ if (ntc == hwcur ) {
582619 return ; /* all buffers are free */
583620 }
584621
585622 /*
586- * We have pending packets in the driver between hwtail+1 and hwcur.
587- * Compute a position in the middle, to be used to generate
588- * a notification.
623+ * We have pending packets in the driver between hwtail+1
624+ * and hwcur, and we have to chose one of these slot to
625+ * generate a notification.
626+ * There is a race but this is only called within txsync which
627+ * does a double check.
589628 */
590- e = generic_tx_event_middle (nm_next (kring -> nr_hwtail , lim ), hwcur , lim );
629+ #if 0
630+ /* Choose a slot in the middle, so that we don't risk ending
631+ * up in a situation where the client continuously wake up,
632+ * fills one or a few TX slots and go to sleep again. */
633+ e = ring_middle (ntc , hwcur , lim );
634+ #else
635+ /* Choose the first pending slot, to be safe against driver
636+ * reordering mbuf transmissions. */
637+ e = ntc ;
638+ #endif
591639
592640 m = kring -> tx_pool [e ];
593- ND (5 , "Request Event at %d mbuf %p refcnt %d" , e , m , m ? MBUF_REFCNT (m ) : -2 );
594641 if (m == NULL ) {
595- /* This can happen if there is already an event on the netmap
596- slot 'e': There is nothing to do. */
642+ /* An event is already in place. */
597643 return ;
598644 }
599- kring -> tx_pool [e ] = NULL ;
645+
646+ mtx_lock (& kring -> tx_event_lock );
647+ if (kring -> tx_event ) {
648+ /* An event is already in place. */
649+ mtx_unlock (& kring -> tx_event_lock );
650+ return ;
651+ }
652+
600653 SET_MBUF_DESTRUCTOR (m , generic_mbuf_destructor );
654+ kring -> tx_event = m ;
655+ mtx_unlock (& kring -> tx_event_lock );
656+
657+ kring -> tx_pool [e ] = NULL ;
658+
659+ ND (5 , "Request Event at %d mbuf %p refcnt %d" , e , m , m ? MBUF_REFCNT (m ) : -2 );
601660
602- // XXX wmb() ?
603- /* Decrement the refcount an free it if we have the last one . */
661+ /* Decrement the refcount. This will free it if we lose the race
662+ * with the driver . */
604663 m_freem (m );
605664 smp_mb ();
606665}
@@ -643,7 +702,7 @@ generic_netmap_txsync(struct netmap_kring *kring, int flags)
643702 /* In txqdisc mode, we ask for a delayed notification,
644703 * but only when cur == hwtail, which means that the
645704 * client is going to block. */
646- event = generic_tx_event_middle (nm_i , head , lim );
705+ event = ring_middle (nm_i , head , lim );
647706 ND (3 , "Place txqdisc event (hwcur=%u,event=%u,"
648707 "head=%u,hwtail=%u)" , nm_i , event , head ,
649708 kring -> nr_hwtail );
@@ -795,7 +854,7 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
795854
796855 if (netmap_generic_mit < 32768 ) {
797856 /* no rx mitigation, pass notification up */
798- netmap_generic_irq (na -> ifp , rr , & work_done );
857+ netmap_generic_irq (na , rr , & work_done );
799858 } else {
800859 /* same as send combining, filter notification if there is a
801860 * pending timer, otherwise pass it up and start a timer.
@@ -804,7 +863,7 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
804863 /* Record that there is some pending work. */
805864 gna -> mit [rr ].mit_pending = 1 ;
806865 } else {
807- netmap_generic_irq (na -> ifp , rr , & work_done );
866+ netmap_generic_irq (na , rr , & work_done );
808867 nm_os_mitigation_start (& gna -> mit [rr ]);
809868 }
810869 }
0 commit comments