About Kernel Documentation Linux Kernel Contact Linux Resources Linux Blog

Documentation / mic / mpssd / mpssd.c




Custom Search

Based on kernel version 3.16. Page generated on 2014-08-06 21:40 EST.

1	/*
2	 * Intel MIC Platform Software Stack (MPSS)
3	 *
4	 * Copyright(c) 2013 Intel Corporation.
5	 *
6	 * This program is free software; you can redistribute it and/or modify
7	 * it under the terms of the GNU General Public License, version 2, as
8	 * published by the Free Software Foundation.
9	 *
10	 * This program is distributed in the hope that it will be useful, but
11	 * WITHOUT ANY WARRANTY; without even the implied warranty of
12	 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	 * General Public License for more details.
14	 *
15	 * The full GNU General Public License is included in this distribution in
16	 * the file called "COPYING".
17	 *
18	 * Intel MIC User Space Tools.
19	 */
20	
21	#define _GNU_SOURCE
22	
23	#include <stdlib.h>
24	#include <fcntl.h>
25	#include <getopt.h>
26	#include <assert.h>
27	#include <unistd.h>
28	#include <stdbool.h>
29	#include <signal.h>
30	#include <poll.h>
31	#include <features.h>
32	#include <sys/types.h>
33	#include <sys/stat.h>
34	#include <sys/mman.h>
35	#include <sys/socket.h>
36	#include <linux/virtio_ring.h>
37	#include <linux/virtio_net.h>
38	#include <linux/virtio_console.h>
39	#include <linux/virtio_blk.h>
40	#include <linux/version.h>
41	#include "mpssd.h"
42	#include <linux/mic_ioctl.h>
43	#include <linux/mic_common.h>
44	
45	static void init_mic(struct mic_info *mic);
46	
47	static FILE *logfp;
48	static struct mic_info mic_list;
49	
50	#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
51	
52	#define min_t(type, x, y) ({				\
53			type __min1 = (x);                      \
54			type __min2 = (y);                      \
55			__min1 < __min2 ? __min1 : __min2; })
56	
57	/* align addr on a size boundary - adjust address up/down if needed */
58	#define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
59	#define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
60	
61	/* align addr on a size boundary - adjust address up if needed */
62	#define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
63	
64	/* to align the pointer to the (next) page boundary */
65	#define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
66	
67	#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
68	
69	#define GSO_ENABLED		1
70	#define MAX_GSO_SIZE		(64 * 1024)
71	#define ETH_H_LEN		14
72	#define MAX_NET_PKT_SIZE	(_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
73	#define MIC_DEVICE_PAGE_END	0x1000
74	
75	#ifndef VIRTIO_NET_HDR_F_DATA_VALID
76	#define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
77	#endif
78	
79	static struct {
80		struct mic_device_desc dd;
81		struct mic_vqconfig vqconfig[2];
82		__u32 host_features, guest_acknowledgements;
83		struct virtio_console_config cons_config;
84	} virtcons_dev_page = {
85		.dd = {
86			.type = VIRTIO_ID_CONSOLE,
87			.num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
88			.feature_len = sizeof(virtcons_dev_page.host_features),
89			.config_len = sizeof(virtcons_dev_page.cons_config),
90		},
91		.vqconfig[0] = {
92			.num = htole16(MIC_VRING_ENTRIES),
93		},
94		.vqconfig[1] = {
95			.num = htole16(MIC_VRING_ENTRIES),
96		},
97	};
98	
99	static struct {
100		struct mic_device_desc dd;
101		struct mic_vqconfig vqconfig[2];
102		__u32 host_features, guest_acknowledgements;
103		struct virtio_net_config net_config;
104	} virtnet_dev_page = {
105		.dd = {
106			.type = VIRTIO_ID_NET,
107			.num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
108			.feature_len = sizeof(virtnet_dev_page.host_features),
109			.config_len = sizeof(virtnet_dev_page.net_config),
110		},
111		.vqconfig[0] = {
112			.num = htole16(MIC_VRING_ENTRIES),
113		},
114		.vqconfig[1] = {
115			.num = htole16(MIC_VRING_ENTRIES),
116		},
117	#if GSO_ENABLED
118			.host_features = htole32(
119			1 << VIRTIO_NET_F_CSUM |
120			1 << VIRTIO_NET_F_GSO |
121			1 << VIRTIO_NET_F_GUEST_TSO4 |
122			1 << VIRTIO_NET_F_GUEST_TSO6 |
123			1 << VIRTIO_NET_F_GUEST_ECN |
124			1 << VIRTIO_NET_F_GUEST_UFO),
125	#else
126			.host_features = 0,
127	#endif
128	};
129	
130	static const char *mic_config_dir = "/etc/sysconfig/mic";
131	static const char *virtblk_backend = "VIRTBLK_BACKEND";
132	static struct {
133		struct mic_device_desc dd;
134		struct mic_vqconfig vqconfig[1];
135		__u32 host_features, guest_acknowledgements;
136		struct virtio_blk_config blk_config;
137	} virtblk_dev_page = {
138		.dd = {
139			.type = VIRTIO_ID_BLOCK,
140			.num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
141			.feature_len = sizeof(virtblk_dev_page.host_features),
142			.config_len = sizeof(virtblk_dev_page.blk_config),
143		},
144		.vqconfig[0] = {
145			.num = htole16(MIC_VRING_ENTRIES),
146		},
147		.host_features =
148			htole32(1<<VIRTIO_BLK_F_SEG_MAX),
149		.blk_config = {
150			.seg_max = htole32(MIC_VRING_ENTRIES - 2),
151			.capacity = htole64(0),
152		 }
153	};
154	
155	static char *myname;
156	
157	static int
158	tap_configure(struct mic_info *mic, char *dev)
159	{
160		pid_t pid;
161		char *ifargv[7];
162		char ipaddr[IFNAMSIZ];
163		int ret = 0;
164	
165		pid = fork();
166		if (pid == 0) {
167			ifargv[0] = "ip";
168			ifargv[1] = "link";
169			ifargv[2] = "set";
170			ifargv[3] = dev;
171			ifargv[4] = "up";
172			ifargv[5] = NULL;
173			mpsslog("Configuring %s\n", dev);
174			ret = execvp("ip", ifargv);
175			if (ret < 0) {
176				mpsslog("%s execvp failed errno %s\n",
177					mic->name, strerror(errno));
178				return ret;
179			}
180		}
181		if (pid < 0) {
182			mpsslog("%s fork failed errno %s\n",
183				mic->name, strerror(errno));
184			return ret;
185		}
186	
187		ret = waitpid(pid, NULL, 0);
188		if (ret < 0) {
189			mpsslog("%s waitpid failed errno %s\n",
190				mic->name, strerror(errno));
191			return ret;
192		}
193	
194		snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
195	
196		pid = fork();
197		if (pid == 0) {
198			ifargv[0] = "ip";
199			ifargv[1] = "addr";
200			ifargv[2] = "add";
201			ifargv[3] = ipaddr;
202			ifargv[4] = "dev";
203			ifargv[5] = dev;
204			ifargv[6] = NULL;
205			mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
206			ret = execvp("ip", ifargv);
207			if (ret < 0) {
208				mpsslog("%s execvp failed errno %s\n",
209					mic->name, strerror(errno));
210				return ret;
211			}
212		}
213		if (pid < 0) {
214			mpsslog("%s fork failed errno %s\n",
215				mic->name, strerror(errno));
216			return ret;
217		}
218	
219		ret = waitpid(pid, NULL, 0);
220		if (ret < 0) {
221			mpsslog("%s waitpid failed errno %s\n",
222				mic->name, strerror(errno));
223			return ret;
224		}
225		mpsslog("MIC name %s %s %d DONE!\n",
226			mic->name, __func__, __LINE__);
227		return 0;
228	}
229	
230	static int tun_alloc(struct mic_info *mic, char *dev)
231	{
232		struct ifreq ifr;
233		int fd, err;
234	#if GSO_ENABLED
235		unsigned offload;
236	#endif
237		fd = open("/dev/net/tun", O_RDWR);
238		if (fd < 0) {
239			mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
240			goto done;
241		}
242	
243		memset(&ifr, 0, sizeof(ifr));
244	
245		ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
246		if (*dev)
247			strncpy(ifr.ifr_name, dev, IFNAMSIZ);
248	
249		err = ioctl(fd, TUNSETIFF, (void *)&ifr);
250		if (err < 0) {
251			mpsslog("%s %s %d TUNSETIFF failed %s\n",
252				mic->name, __func__, __LINE__, strerror(errno));
253			close(fd);
254			return err;
255		}
256	#if GSO_ENABLED
257		offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
258			TUN_F_TSO_ECN | TUN_F_UFO;
259	
260		err = ioctl(fd, TUNSETOFFLOAD, offload);
261		if (err < 0) {
262			mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
263				mic->name, __func__, __LINE__, strerror(errno));
264			close(fd);
265			return err;
266		}
267	#endif
268		strcpy(dev, ifr.ifr_name);
269		mpsslog("Created TAP %s\n", dev);
270	done:
271		return fd;
272	}
273	
274	#define NET_FD_VIRTIO_NET 0
275	#define NET_FD_TUN 1
276	#define MAX_NET_FD 2
277	
278	static void set_dp(struct mic_info *mic, int type, void *dp)
279	{
280		switch (type) {
281		case VIRTIO_ID_CONSOLE:
282			mic->mic_console.console_dp = dp;
283			return;
284		case VIRTIO_ID_NET:
285			mic->mic_net.net_dp = dp;
286			return;
287		case VIRTIO_ID_BLOCK:
288			mic->mic_virtblk.block_dp = dp;
289			return;
290		}
291		mpsslog("%s %s %d not found\n", mic->name, __func__, type);
292		assert(0);
293	}
294	
295	static void *get_dp(struct mic_info *mic, int type)
296	{
297		switch (type) {
298		case VIRTIO_ID_CONSOLE:
299			return mic->mic_console.console_dp;
300		case VIRTIO_ID_NET:
301			return mic->mic_net.net_dp;
302		case VIRTIO_ID_BLOCK:
303			return mic->mic_virtblk.block_dp;
304		}
305		mpsslog("%s %s %d not found\n", mic->name, __func__, type);
306		assert(0);
307		return NULL;
308	}
309	
310	static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
311	{
312		struct mic_device_desc *d;
313		int i;
314		void *dp = get_dp(mic, type);
315	
316		for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
317			i += mic_total_desc_size(d)) {
318			d = dp + i;
319	
320			/* End of list */
321			if (d->type == 0)
322				break;
323	
324			if (d->type == -1)
325				continue;
326	
327			mpsslog("%s %s d-> type %d d %p\n",
328				mic->name, __func__, d->type, d);
329	
330			if (d->type == (__u8)type)
331				return d;
332		}
333		mpsslog("%s %s %d not found\n", mic->name, __func__, type);
334		assert(0);
335		return NULL;
336	}
337	
338	/* See comments in vhost.c for explanation of next_desc() */
339	static unsigned next_desc(struct vring_desc *desc)
340	{
341		unsigned int next;
342	
343		if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
344			return -1U;
345		next = le16toh(desc->next);
346		return next;
347	}
348	
349	/* Sum up all the IOVEC length */
350	static ssize_t
351	sum_iovec_len(struct mic_copy_desc *copy)
352	{
353		ssize_t sum = 0;
354		int i;
355	
356		for (i = 0; i < copy->iovcnt; i++)
357			sum += copy->iov[i].iov_len;
358		return sum;
359	}
360	
361	static inline void verify_out_len(struct mic_info *mic,
362		struct mic_copy_desc *copy)
363	{
364		if (copy->out_len != sum_iovec_len(copy)) {
365			mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
366				mic->name, __func__, __LINE__,
367				copy->out_len, sum_iovec_len(copy));
368			assert(copy->out_len == sum_iovec_len(copy));
369		}
370	}
371	
372	/* Display an iovec */
373	static void
374	disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
375		   const char *s, int line)
376	{
377		int i;
378	
379		for (i = 0; i < copy->iovcnt; i++)
380			mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
381				mic->name, s, line, i,
382				copy->iov[i].iov_base, copy->iov[i].iov_len);
383	}
384	
385	static inline __u16 read_avail_idx(struct mic_vring *vr)
386	{
387		return ACCESS_ONCE(vr->info->avail_idx);
388	}
389	
390	static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
391					struct mic_copy_desc *copy, ssize_t len)
392	{
393		copy->vr_idx = tx ? 0 : 1;
394		copy->update_used = true;
395		if (type == VIRTIO_ID_NET)
396			copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
397		else
398			copy->iov[0].iov_len = len;
399	}
400	
401	/* Central API which triggers the copies */
402	static int
403	mic_virtio_copy(struct mic_info *mic, int fd,
404			struct mic_vring *vr, struct mic_copy_desc *copy)
405	{
406		int ret;
407	
408		ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
409		if (ret) {
410			mpsslog("%s %s %d errno %s ret %d\n",
411				mic->name, __func__, __LINE__,
412				strerror(errno), ret);
413		}
414		return ret;
415	}
416	
417	/*
418	 * This initialization routine requires at least one
419	 * vring i.e. vr0. vr1 is optional.
420	 */
421	static void *
422	init_vr(struct mic_info *mic, int fd, int type,
423		struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
424	{
425		int vr_size;
426		char *va;
427	
428		vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
429			MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
430		va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
431			PROT_READ, MAP_SHARED, fd, 0);
432		if (MAP_FAILED == va) {
433			mpsslog("%s %s %d mmap failed errno %s\n",
434				mic->name, __func__, __LINE__,
435				strerror(errno));
436			goto done;
437		}
438		set_dp(mic, type, va);
439		vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
440		vr0->info = vr0->va +
441			vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
442		vring_init(&vr0->vr,
443			   MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
444		mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
445			__func__, mic->name, vr0->va, vr0->info, vr_size,
446			vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
447		mpsslog("magic 0x%x expected 0x%x\n",
448			le32toh(vr0->info->magic), MIC_MAGIC + type);
449		assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
450		if (vr1) {
451			vr1->va = (struct mic_vring *)
452				&va[MIC_DEVICE_PAGE_END + vr_size];
453			vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
454				MIC_VIRTIO_RING_ALIGN);
455			vring_init(&vr1->vr,
456				   MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
457			mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
458				__func__, mic->name, vr1->va, vr1->info, vr_size,
459				vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
460			mpsslog("magic 0x%x expected 0x%x\n",
461				le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
462			assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
463		}
464	done:
465		return va;
466	}
467	
468	static void
469	wait_for_card_driver(struct mic_info *mic, int fd, int type)
470	{
471		struct pollfd pollfd;
472		int err;
473		struct mic_device_desc *desc = get_device_desc(mic, type);
474	
475		pollfd.fd = fd;
476		mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
477			mic->name, __func__, type, desc->status);
478		while (1) {
479			pollfd.events = POLLIN;
480			pollfd.revents = 0;
481			err = poll(&pollfd, 1, -1);
482			if (err < 0) {
483				mpsslog("%s %s poll failed %s\n",
484					mic->name, __func__, strerror(errno));
485				continue;
486			}
487	
488			if (pollfd.revents) {
489				mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
490					mic->name, __func__, type, desc->status);
491				if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
492					mpsslog("%s %s poll.revents %d\n",
493						mic->name, __func__, pollfd.revents);
494					mpsslog("%s %s desc-> type %d status 0x%x\n",
495						mic->name, __func__, type,
496						desc->status);
497					break;
498				}
499			}
500		}
501	}
502	
503	/* Spin till we have some descriptors */
504	static void
505	spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
506	{
507		__u16 avail_idx = read_avail_idx(vr);
508	
509		while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
510	#ifdef DEBUG
511			mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
512				mic->name, __func__,
513				le16toh(vr->vr.avail->idx), vr->info->avail_idx);
514	#endif
515			sched_yield();
516		}
517	}
518	
519	static void *
520	virtio_net(void *arg)
521	{
522		static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
523		static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
524		struct iovec vnet_iov[2][2] = {
525			{ { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
526			  { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
527			{ { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
528			  { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
529		};
530		struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
531		struct mic_info *mic = (struct mic_info *)arg;
532		char if_name[IFNAMSIZ];
533		struct pollfd net_poll[MAX_NET_FD];
534		struct mic_vring tx_vr, rx_vr;
535		struct mic_copy_desc copy;
536		struct mic_device_desc *desc;
537		int err;
538	
539		snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
540		mic->mic_net.tap_fd = tun_alloc(mic, if_name);
541		if (mic->mic_net.tap_fd < 0)
542			goto done;
543	
544		if (tap_configure(mic, if_name))
545			goto done;
546		mpsslog("MIC name %s id %d\n", mic->name, mic->id);
547	
548		net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
549		net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
550		net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
551		net_poll[NET_FD_TUN].events = POLLIN;
552	
553		if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
554					  VIRTIO_ID_NET, &tx_vr, &rx_vr,
555			virtnet_dev_page.dd.num_vq)) {
556			mpsslog("%s init_vr failed %s\n",
557				mic->name, strerror(errno));
558			goto done;
559		}
560	
561		copy.iovcnt = 2;
562		desc = get_device_desc(mic, VIRTIO_ID_NET);
563	
564		while (1) {
565			ssize_t len;
566	
567			net_poll[NET_FD_VIRTIO_NET].revents = 0;
568			net_poll[NET_FD_TUN].revents = 0;
569	
570			/* Start polling for data from tap and virtio net */
571			err = poll(net_poll, 2, -1);
572			if (err < 0) {
573				mpsslog("%s poll failed %s\n",
574					__func__, strerror(errno));
575				continue;
576			}
577			if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
578				wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
579						     VIRTIO_ID_NET);
580			/*
581			 * Check if there is data to be read from TUN and write to
582			 * virtio net fd if there is.
583			 */
584			if (net_poll[NET_FD_TUN].revents & POLLIN) {
585				copy.iov = iov0;
586				len = readv(net_poll[NET_FD_TUN].fd,
587					copy.iov, copy.iovcnt);
588				if (len > 0) {
589					struct virtio_net_hdr *hdr
590						= (struct virtio_net_hdr *)vnet_hdr[0];
591	
592					/* Disable checksums on the card since we are on
593					   a reliable PCIe link */
594					hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
595	#ifdef DEBUG
596					mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
597						__func__, __LINE__, hdr->flags);
598					mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
599						copy.out_len, hdr->gso_type);
600	#endif
601	#ifdef DEBUG
602					disp_iovec(mic, copy, __func__, __LINE__);
603					mpsslog("%s %s %d read from tap 0x%lx\n",
604						mic->name, __func__, __LINE__,
605						len);
606	#endif
607					spin_for_descriptors(mic, &tx_vr);
608					txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
609						     len);
610	
611					err = mic_virtio_copy(mic,
612						mic->mic_net.virtio_net_fd, &tx_vr,
613						&copy);
614					if (err < 0) {
615						mpsslog("%s %s %d mic_virtio_copy %s\n",
616							mic->name, __func__, __LINE__,
617							strerror(errno));
618					}
619					if (!err)
620						verify_out_len(mic, &copy);
621	#ifdef DEBUG
622					disp_iovec(mic, copy, __func__, __LINE__);
623					mpsslog("%s %s %d wrote to net 0x%lx\n",
624						mic->name, __func__, __LINE__,
625						sum_iovec_len(&copy));
626	#endif
627					/* Reinitialize IOV for next run */
628					iov0[1].iov_len = MAX_NET_PKT_SIZE;
629				} else if (len < 0) {
630					disp_iovec(mic, &copy, __func__, __LINE__);
631					mpsslog("%s %s %d read failed %s ", mic->name,
632						__func__, __LINE__, strerror(errno));
633					mpsslog("cnt %d sum %zd\n",
634						copy.iovcnt, sum_iovec_len(&copy));
635				}
636			}
637	
638			/*
639			 * Check if there is data to be read from virtio net and
640			 * write to TUN if there is.
641			 */
642			if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
643				while (rx_vr.info->avail_idx !=
644					le16toh(rx_vr.vr.avail->idx)) {
645					copy.iov = iov1;
646					txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
647						     MAX_NET_PKT_SIZE
648						+ sizeof(struct virtio_net_hdr));
649	
650					err = mic_virtio_copy(mic,
651						mic->mic_net.virtio_net_fd, &rx_vr,
652						&copy);
653					if (!err) {
654	#ifdef DEBUG
655						struct virtio_net_hdr *hdr
656							= (struct virtio_net_hdr *)
657								vnet_hdr[1];
658	
659						mpsslog("%s %s %d hdr->flags 0x%x, ",
660							mic->name, __func__, __LINE__,
661							hdr->flags);
662						mpsslog("out_len %d gso_type 0x%x\n",
663							copy.out_len,
664							hdr->gso_type);
665	#endif
666						/* Set the correct output iov_len */
667						iov1[1].iov_len = copy.out_len -
668							sizeof(struct virtio_net_hdr);
669						verify_out_len(mic, &copy);
670	#ifdef DEBUG
671						disp_iovec(mic, copy, __func__,
672							   __LINE__);
673						mpsslog("%s %s %d ",
674							mic->name, __func__, __LINE__);
675						mpsslog("read from net 0x%lx\n",
676							sum_iovec_len(copy));
677	#endif
678						len = writev(net_poll[NET_FD_TUN].fd,
679							copy.iov, copy.iovcnt);
680						if (len != sum_iovec_len(&copy)) {
681							mpsslog("Tun write failed %s ",
682								strerror(errno));
683							mpsslog("len 0x%zx ", len);
684							mpsslog("read_len 0x%zx\n",
685								sum_iovec_len(&copy));
686						} else {
687	#ifdef DEBUG
688							disp_iovec(mic, &copy, __func__,
689								   __LINE__);
690							mpsslog("%s %s %d ",
691								mic->name, __func__,
692								__LINE__);
693							mpsslog("wrote to tap 0x%lx\n",
694								len);
695	#endif
696						}
697					} else {
698						mpsslog("%s %s %d mic_virtio_copy %s\n",
699							mic->name, __func__, __LINE__,
700							strerror(errno));
701						break;
702					}
703				}
704			}
705			if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
706				mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
707		}
708	done:
709		pthread_exit(NULL);
710	}
711	
712	/* virtio_console */
713	#define VIRTIO_CONSOLE_FD 0
714	#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
715	#define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
716	#define MAX_BUFFER_SIZE PAGE_SIZE
717	
718	static void *
719	virtio_console(void *arg)
720	{
721		static __u8 vcons_buf[2][PAGE_SIZE];
722		struct iovec vcons_iov[2] = {
723			{ .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
724			{ .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
725		};
726		struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
727		struct mic_info *mic = (struct mic_info *)arg;
728		int err;
729		struct pollfd console_poll[MAX_CONSOLE_FD];
730		int pty_fd;
731		char *pts_name;
732		ssize_t len;
733		struct mic_vring tx_vr, rx_vr;
734		struct mic_copy_desc copy;
735		struct mic_device_desc *desc;
736	
737		pty_fd = posix_openpt(O_RDWR);
738		if (pty_fd < 0) {
739			mpsslog("can't open a pseudoterminal master device: %s\n",
740				strerror(errno));
741			goto _return;
742		}
743		pts_name = ptsname(pty_fd);
744		if (pts_name == NULL) {
745			mpsslog("can't get pts name\n");
746			goto _close_pty;
747		}
748		printf("%s console message goes to %s\n", mic->name, pts_name);
749		mpsslog("%s console message goes to %s\n", mic->name, pts_name);
750		err = grantpt(pty_fd);
751		if (err < 0) {
752			mpsslog("can't grant access: %s %s\n",
753				pts_name, strerror(errno));
754			goto _close_pty;
755		}
756		err = unlockpt(pty_fd);
757		if (err < 0) {
758			mpsslog("can't unlock a pseudoterminal: %s %s\n",
759				pts_name, strerror(errno));
760			goto _close_pty;
761		}
762		console_poll[MONITOR_FD].fd = pty_fd;
763		console_poll[MONITOR_FD].events = POLLIN;
764	
765		console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
766		console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
767	
768		if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
769					  VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
770			virtcons_dev_page.dd.num_vq)) {
771			mpsslog("%s init_vr failed %s\n",
772				mic->name, strerror(errno));
773			goto _close_pty;
774		}
775	
776		copy.iovcnt = 1;
777		desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
778	
779		for (;;) {
780			console_poll[MONITOR_FD].revents = 0;
781			console_poll[VIRTIO_CONSOLE_FD].revents = 0;
782			err = poll(console_poll, MAX_CONSOLE_FD, -1);
783			if (err < 0) {
784				mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
785					strerror(errno));
786				continue;
787			}
788			if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
789				wait_for_card_driver(mic,
790						     mic->mic_console.virtio_console_fd,
791					VIRTIO_ID_CONSOLE);
792	
793			if (console_poll[MONITOR_FD].revents & POLLIN) {
794				copy.iov = iov0;
795				len = readv(pty_fd, copy.iov, copy.iovcnt);
796				if (len > 0) {
797	#ifdef DEBUG
798					disp_iovec(mic, copy, __func__, __LINE__);
799					mpsslog("%s %s %d read from tap 0x%lx\n",
800						mic->name, __func__, __LINE__,
801						len);
802	#endif
803					spin_for_descriptors(mic, &tx_vr);
804					txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
805						     &copy, len);
806	
807					err = mic_virtio_copy(mic,
808						mic->mic_console.virtio_console_fd,
809						&tx_vr, &copy);
810					if (err < 0) {
811						mpsslog("%s %s %d mic_virtio_copy %s\n",
812							mic->name, __func__, __LINE__,
813							strerror(errno));
814					}
815					if (!err)
816						verify_out_len(mic, &copy);
817	#ifdef DEBUG
818					disp_iovec(mic, copy, __func__, __LINE__);
819					mpsslog("%s %s %d wrote to net 0x%lx\n",
820						mic->name, __func__, __LINE__,
821						sum_iovec_len(copy));
822	#endif
823					/* Reinitialize IOV for next run */
824					iov0->iov_len = PAGE_SIZE;
825				} else if (len < 0) {
826					disp_iovec(mic, &copy, __func__, __LINE__);
827					mpsslog("%s %s %d read failed %s ",
828						mic->name, __func__, __LINE__,
829						strerror(errno));
830					mpsslog("cnt %d sum %zd\n",
831						copy.iovcnt, sum_iovec_len(&copy));
832				}
833			}
834	
835			if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
836				while (rx_vr.info->avail_idx !=
837					le16toh(rx_vr.vr.avail->idx)) {
838					copy.iov = iov1;
839					txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
840						     &copy, PAGE_SIZE);
841	
842					err = mic_virtio_copy(mic,
843						mic->mic_console.virtio_console_fd,
844						&rx_vr, &copy);
845					if (!err) {
846						/* Set the correct output iov_len */
847						iov1->iov_len = copy.out_len;
848						verify_out_len(mic, &copy);
849	#ifdef DEBUG
850						disp_iovec(mic, copy, __func__,
851							   __LINE__);
852						mpsslog("%s %s %d ",
853							mic->name, __func__, __LINE__);
854						mpsslog("read from net 0x%lx\n",
855							sum_iovec_len(copy));
856	#endif
857						len = writev(pty_fd,
858							copy.iov, copy.iovcnt);
859						if (len != sum_iovec_len(&copy)) {
860							mpsslog("Tun write failed %s ",
861								strerror(errno));
862							mpsslog("len 0x%zx ", len);
863							mpsslog("read_len 0x%zx\n",
864								sum_iovec_len(&copy));
865						} else {
866	#ifdef DEBUG
867							disp_iovec(mic, copy, __func__,
868								   __LINE__);
869							mpsslog("%s %s %d ",
870								mic->name, __func__,
871								__LINE__);
872							mpsslog("wrote to tap 0x%lx\n",
873								len);
874	#endif
875						}
876					} else {
877						mpsslog("%s %s %d mic_virtio_copy %s\n",
878							mic->name, __func__, __LINE__,
879							strerror(errno));
880						break;
881					}
882				}
883			}
884			if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
885				mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
886		}
887	_close_pty:
888		close(pty_fd);
889	_return:
890		pthread_exit(NULL);
891	}
892	
893	static void
894	add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
895	{
896		char path[PATH_MAX];
897		int fd, err;
898	
899		snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
900		fd = open(path, O_RDWR);
901		if (fd < 0) {
902			mpsslog("Could not open %s %s\n", path, strerror(errno));
903			return;
904		}
905	
906		err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
907		if (err < 0) {
908			mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
909			close(fd);
910			return;
911		}
912		switch (dd->type) {
913		case VIRTIO_ID_NET:
914			mic->mic_net.virtio_net_fd = fd;
915			mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
916			break;
917		case VIRTIO_ID_CONSOLE:
918			mic->mic_console.virtio_console_fd = fd;
919			mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
920			break;
921		case VIRTIO_ID_BLOCK:
922			mic->mic_virtblk.virtio_block_fd = fd;
923			mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
924			break;
925		}
926	}
927	
928	static bool
929	set_backend_file(struct mic_info *mic)
930	{
931		FILE *config;
932		char buff[PATH_MAX], *line, *evv, *p;
933	
934		snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
935		config = fopen(buff, "r");
936		if (config == NULL)
937			return false;
938		do {  /* look for "virtblk_backend=XXXX" */
939			line = fgets(buff, PATH_MAX, config);
940			if (line == NULL)
941				break;
942			if (*line == '#')
943				continue;
944			p = strchr(line, '\n');
945			if (p)
946				*p = '\0';
947		} while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
948		fclose(config);
949		if (line == NULL)
950			return false;
951		evv = strchr(line, '=');
952		if (evv == NULL)
953			return false;
954		mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
955		if (mic->mic_virtblk.backend_file == NULL) {
956			mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
957			return false;
958		}
959		strcpy(mic->mic_virtblk.backend_file, evv + 1);
960		return true;
961	}
962	
963	#define SECTOR_SIZE 512
964	static bool
965	set_backend_size(struct mic_info *mic)
966	{
967		mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
968			SEEK_END);
969		if (mic->mic_virtblk.backend_size < 0) {
970			mpsslog("%s: can't seek: %s\n",
971				mic->name, mic->mic_virtblk.backend_file);
972			return false;
973		}
974		virtblk_dev_page.blk_config.capacity =
975			mic->mic_virtblk.backend_size / SECTOR_SIZE;
976		if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
977			virtblk_dev_page.blk_config.capacity++;
978	
979		virtblk_dev_page.blk_config.capacity =
980			htole64(virtblk_dev_page.blk_config.capacity);
981	
982		return true;
983	}
984	
985	static bool
986	open_backend(struct mic_info *mic)
987	{
988		if (!set_backend_file(mic))
989			goto _error_exit;
990		mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
991		if (mic->mic_virtblk.backend < 0) {
992			mpsslog("%s: can't open: %s\n", mic->name,
993				mic->mic_virtblk.backend_file);
994			goto _error_free;
995		}
996		if (!set_backend_size(mic))
997			goto _error_close;
998		mic->mic_virtblk.backend_addr = mmap(NULL,
999			mic->mic_virtblk.backend_size,
1000			PROT_READ|PROT_WRITE, MAP_SHARED,
1001			mic->mic_virtblk.backend, 0L);
1002		if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1003			mpsslog("%s: can't map: %s %s\n",
1004				mic->name, mic->mic_virtblk.backend_file,
1005				strerror(errno));
1006			goto _error_close;
1007		}
1008		return true;
1009	
1010	 _error_close:
1011		close(mic->mic_virtblk.backend);
1012	 _error_free:
1013		free(mic->mic_virtblk.backend_file);
1014	 _error_exit:
1015		return false;
1016	}
1017	
1018	static void
1019	close_backend(struct mic_info *mic)
1020	{
1021		munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1022		close(mic->mic_virtblk.backend);
1023		free(mic->mic_virtblk.backend_file);
1024	}
1025	
1026	static bool
1027	start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1028	{
1029		if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1030			mpsslog("%s: blk_config is not 8 byte aligned.\n",
1031				mic->name);
1032			return false;
1033		}
1034		add_virtio_device(mic, &virtblk_dev_page.dd);
1035		if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1036					  VIRTIO_ID_BLOCK, vring, NULL,
1037					  virtblk_dev_page.dd.num_vq)) {
1038			mpsslog("%s init_vr failed %s\n",
1039				mic->name, strerror(errno));
1040			return false;
1041		}
1042		return true;
1043	}
1044	
1045	static void
1046	stop_virtblk(struct mic_info *mic)
1047	{
1048		int vr_size, ret;
1049	
1050		vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
1051			MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
1052		ret = munmap(mic->mic_virtblk.block_dp,
1053			MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1054		if (ret < 0)
1055			mpsslog("%s munmap errno %d\n", mic->name, errno);
1056		close(mic->mic_virtblk.virtio_block_fd);
1057	}
1058	
1059	static __u8
1060	header_error_check(struct vring_desc *desc)
1061	{
1062		if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1063			mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1064				__func__, __LINE__);
1065			return -EIO;
1066		}
1067		if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1068			mpsslog("%s() %d: alone\n",
1069				__func__, __LINE__);
1070			return -EIO;
1071		}
1072		if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1073			mpsslog("%s() %d: not read\n",
1074				__func__, __LINE__);
1075			return -EIO;
1076		}
1077		return 0;
1078	}
1079	
1080	static int
1081	read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1082	{
1083		struct iovec iovec;
1084		struct mic_copy_desc copy;
1085	
1086		iovec.iov_len = sizeof(*hdr);
1087		iovec.iov_base = hdr;
1088		copy.iov = &iovec;
1089		copy.iovcnt = 1;
1090		copy.vr_idx = 0;  /* only one vring on virtio_block */
1091		copy.update_used = false;  /* do not update used index */
1092		return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1093	}
1094	
1095	static int
1096	transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1097	{
1098		struct mic_copy_desc copy;
1099	
1100		copy.iov = iovec;
1101		copy.iovcnt = iovcnt;
1102		copy.vr_idx = 0;  /* only one vring on virtio_block */
1103		copy.update_used = false;  /* do not update used index */
1104		return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1105	}
1106	
1107	static __u8
1108	status_error_check(struct vring_desc *desc)
1109	{
1110		if (le32toh(desc->len) != sizeof(__u8)) {
1111			mpsslog("%s() %d: length is not sizeof(status)\n",
1112				__func__, __LINE__);
1113			return -EIO;
1114		}
1115		return 0;
1116	}
1117	
1118	static int
1119	write_status(int fd, __u8 *status)
1120	{
1121		struct iovec iovec;
1122		struct mic_copy_desc copy;
1123	
1124		iovec.iov_base = status;
1125		iovec.iov_len = sizeof(*status);
1126		copy.iov = &iovec;
1127		copy.iovcnt = 1;
1128		copy.vr_idx = 0;  /* only one vring on virtio_block */
1129		copy.update_used = true; /* Update used index */
1130		return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1131	}
1132	
1133	static void *
1134	virtio_block(void *arg)
1135	{
1136		struct mic_info *mic = (struct mic_info *)arg;
1137		int ret;
1138		struct pollfd block_poll;
1139		struct mic_vring vring;
1140		__u16 avail_idx;
1141		__u32 desc_idx;
1142		struct vring_desc *desc;
1143		struct iovec *iovec, *piov;
1144		__u8 status;
1145		__u32 buffer_desc_idx;
1146		struct virtio_blk_outhdr hdr;
1147		void *fos;
1148	
1149		for (;;) {  /* forever */
1150			if (!open_backend(mic)) { /* No virtblk */
1151				for (mic->mic_virtblk.signaled = 0;
1152					!mic->mic_virtblk.signaled;)
1153					sleep(1);
1154				continue;
1155			}
1156	
1157			/* backend file is specified. */
1158			if (!start_virtblk(mic, &vring))
1159				goto _close_backend;
1160			iovec = malloc(sizeof(*iovec) *
1161				le32toh(virtblk_dev_page.blk_config.seg_max));
1162			if (!iovec) {
1163				mpsslog("%s: can't alloc iovec: %s\n",
1164					mic->name, strerror(ENOMEM));
1165				goto _stop_virtblk;
1166			}
1167	
1168			block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1169			block_poll.events = POLLIN;
1170			for (mic->mic_virtblk.signaled = 0;
1171			     !mic->mic_virtblk.signaled;) {
1172				block_poll.revents = 0;
1173						/* timeout in 1 sec to see signaled */
1174				ret = poll(&block_poll, 1, 1000);
1175				if (ret < 0) {
1176					mpsslog("%s %d: poll failed: %s\n",
1177						__func__, __LINE__,
1178						strerror(errno));
1179					continue;
1180				}
1181	
1182				if (!(block_poll.revents & POLLIN)) {
1183	#ifdef DEBUG
1184					mpsslog("%s %d: block_poll.revents=0x%x\n",
1185						__func__, __LINE__, block_poll.revents);
1186	#endif
1187					continue;
1188				}
1189	
1190				/* POLLIN */
1191				while (vring.info->avail_idx !=
1192					le16toh(vring.vr.avail->idx)) {
1193					/* read header element */
1194					avail_idx =
1195						vring.info->avail_idx &
1196						(vring.vr.num - 1);
1197					desc_idx = le16toh(
1198						vring.vr.avail->ring[avail_idx]);
1199					desc = &vring.vr.desc[desc_idx];
1200	#ifdef DEBUG
1201					mpsslog("%s() %d: avail_idx=%d ",
1202						__func__, __LINE__,
1203						vring.info->avail_idx);
1204					mpsslog("vring.vr.num=%d desc=%p\n",
1205						vring.vr.num, desc);
1206	#endif
1207					status = header_error_check(desc);
1208					ret = read_header(
1209						mic->mic_virtblk.virtio_block_fd,
1210						&hdr, desc_idx);
1211					if (ret < 0) {
1212						mpsslog("%s() %d %s: ret=%d %s\n",
1213							__func__, __LINE__,
1214							mic->name, ret,
1215							strerror(errno));
1216						break;
1217					}
1218					/* buffer element */
1219					piov = iovec;
1220					status = 0;
1221					fos = mic->mic_virtblk.backend_addr +
1222						(hdr.sector * SECTOR_SIZE);
1223					buffer_desc_idx = next_desc(desc);
1224					desc_idx = buffer_desc_idx;
1225					for (desc = &vring.vr.desc[buffer_desc_idx];
1226					     desc->flags & VRING_DESC_F_NEXT;
1227					     desc_idx = next_desc(desc),
1228						     desc = &vring.vr.desc[desc_idx]) {
1229						piov->iov_len = desc->len;
1230						piov->iov_base = fos;
1231						piov++;
1232						fos += desc->len;
1233					}
1234					/* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1235					if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1236						VIRTIO_BLK_T_GET_ID)) {
1237						/*
1238						  VIRTIO_BLK_T_IN - does not do
1239						  anything. Probably for documenting.
1240						  VIRTIO_BLK_T_SCSI_CMD - for
1241						  virtio_scsi.
1242						  VIRTIO_BLK_T_FLUSH - turned off in
1243						  config space.
1244						  VIRTIO_BLK_T_BARRIER - defined but not
1245						  used in anywhere.
1246						*/
1247						mpsslog("%s() %d: type %x ",
1248							__func__, __LINE__,
1249							hdr.type);
1250						mpsslog("is not supported\n");
1251						status = -ENOTSUP;
1252	
1253					} else {
1254						ret = transfer_blocks(
1255						mic->mic_virtblk.virtio_block_fd,
1256							iovec,
1257							piov - iovec);
1258						if (ret < 0 &&
1259						    status != 0)
1260							status = ret;
1261					}
1262					/* write status and update used pointer */
1263					if (status != 0)
1264						status = status_error_check(desc);
1265					ret = write_status(
1266						mic->mic_virtblk.virtio_block_fd,
1267						&status);
1268	#ifdef DEBUG
1269					mpsslog("%s() %d: write status=%d on desc=%p\n",
1270						__func__, __LINE__,
1271						status, desc);
1272	#endif
1273				}
1274			}
1275			free(iovec);
1276	_stop_virtblk:
1277			stop_virtblk(mic);
1278	_close_backend:
1279			close_backend(mic);
1280		}  /* forever */
1281	
1282		pthread_exit(NULL);
1283	}
1284	
1285	static void
1286	reset(struct mic_info *mic)
1287	{
1288	#define RESET_TIMEOUT 120
1289		int i = RESET_TIMEOUT;
1290		setsysfs(mic->name, "state", "reset");
1291		while (i) {
1292			char *state;
1293			state = readsysfs(mic->name, "state");
1294			if (!state)
1295				goto retry;
1296			mpsslog("%s: %s %d state %s\n",
1297				mic->name, __func__, __LINE__, state);
1298	
1299			/*
1300			 * If the shutdown was initiated by OSPM, the state stays
1301			 * in "suspended" which is also a valid condition for reset.
1302			 */
1303			if ((!strcmp(state, "offline")) ||
1304			    (!strcmp(state, "suspended"))) {
1305				free(state);
1306				break;
1307			}
1308			free(state);
1309	retry:
1310			sleep(1);
1311			i--;
1312		}
1313	}
1314	
1315	static int
1316	get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1317	{
1318		if (!strcmp(shutdown_status, "nop"))
1319			return MIC_NOP;
1320		if (!strcmp(shutdown_status, "crashed"))
1321			return MIC_CRASHED;
1322		if (!strcmp(shutdown_status, "halted"))
1323			return MIC_HALTED;
1324		if (!strcmp(shutdown_status, "poweroff"))
1325			return MIC_POWER_OFF;
1326		if (!strcmp(shutdown_status, "restart"))
1327			return MIC_RESTART;
1328		mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1329		/* Invalid state */
1330		assert(0);
1331	};
1332	
1333	static int get_mic_state(struct mic_info *mic, char *state)
1334	{
1335		if (!strcmp(state, "offline"))
1336			return MIC_OFFLINE;
1337		if (!strcmp(state, "online"))
1338			return MIC_ONLINE;
1339		if (!strcmp(state, "shutting_down"))
1340			return MIC_SHUTTING_DOWN;
1341		if (!strcmp(state, "reset_failed"))
1342			return MIC_RESET_FAILED;
1343		if (!strcmp(state, "suspending"))
1344			return MIC_SUSPENDING;
1345		if (!strcmp(state, "suspended"))
1346			return MIC_SUSPENDED;
1347		mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1348		/* Invalid state */
1349		assert(0);
1350	};
1351	
1352	static void mic_handle_shutdown(struct mic_info *mic)
1353	{
1354	#define SHUTDOWN_TIMEOUT 60
1355		int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
1356		char *shutdown_status;
1357		while (i) {
1358			shutdown_status = readsysfs(mic->name, "shutdown_status");
1359			if (!shutdown_status)
1360				continue;
1361			mpsslog("%s: %s %d shutdown_status %s\n",
1362				mic->name, __func__, __LINE__, shutdown_status);
1363			switch (get_mic_shutdown_status(mic, shutdown_status)) {
1364			case MIC_RESTART:
1365				mic->restart = 1;
1366			case MIC_HALTED:
1367			case MIC_POWER_OFF:
1368			case MIC_CRASHED:
1369				free(shutdown_status);
1370				goto reset;
1371			default:
1372				break;
1373			}
1374			free(shutdown_status);
1375			sleep(1);
1376			i--;
1377		}
1378	reset:
1379		ret = kill(mic->pid, SIGTERM);
1380		mpsslog("%s: %s %d kill pid %d ret %d\n",
1381			mic->name, __func__, __LINE__,
1382			mic->pid, ret);
1383		if (!ret) {
1384			ret = waitpid(mic->pid, &stat,
1385				WIFSIGNALED(stat));
1386			mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1387				mic->name, __func__, __LINE__,
1388				ret, mic->pid);
1389		}
1390		if (ret == mic->pid)
1391			reset(mic);
1392	}
1393	
1394	static void *
1395	mic_config(void *arg)
1396	{
1397		struct mic_info *mic = (struct mic_info *)arg;
1398		char *state = NULL;
1399		char pathname[PATH_MAX];
1400		int fd, ret;
1401		struct pollfd ufds[1];
1402		char value[4096];
1403	
1404		snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1405			 MICSYSFSDIR, mic->name, "state");
1406	
1407		fd = open(pathname, O_RDONLY);
1408		if (fd < 0) {
1409			mpsslog("%s: opening file %s failed %s\n",
1410				mic->name, pathname, strerror(errno));
1411			goto error;
1412		}
1413	
1414		do {
1415			ret = lseek(fd, 0, SEEK_SET);
1416			if (ret < 0) {
1417				mpsslog("%s: Failed to seek to file start '%s': %s\n",
1418					mic->name, pathname, strerror(errno));
1419				goto close_error1;
1420			}
1421			ret = read(fd, value, sizeof(value));
1422			if (ret < 0) {
1423				mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
1424					mic->name, pathname, strerror(errno));
1425				goto close_error1;
1426			}
1427	retry:
1428			state = readsysfs(mic->name, "state");
1429			if (!state)
1430				goto retry;
1431			mpsslog("%s: %s %d state %s\n",
1432				mic->name, __func__, __LINE__, state);
1433			switch (get_mic_state(mic, state)) {
1434			case MIC_SHUTTING_DOWN:
1435				mic_handle_shutdown(mic);
1436				goto close_error;
1437			case MIC_SUSPENDING:
1438				mic->boot_on_resume = 1;
1439				setsysfs(mic->name, "state", "suspend");
1440				mic_handle_shutdown(mic);
1441				goto close_error;
1442			case MIC_OFFLINE:
1443				if (mic->boot_on_resume) {
1444					setsysfs(mic->name, "state", "boot");
1445					mic->boot_on_resume = 0;
1446				}
1447				break;
1448			default:
1449				break;
1450			}
1451			free(state);
1452	
1453			ufds[0].fd = fd;
1454			ufds[0].events = POLLERR | POLLPRI;
1455			ret = poll(ufds, 1, -1);
1456			if (ret < 0) {
1457				mpsslog("%s: poll failed %s\n",
1458					mic->name, strerror(errno));
1459				goto close_error1;
1460			}
1461		} while (1);
1462	close_error:
1463		free(state);
1464	close_error1:
1465		close(fd);
1466	error:
1467		init_mic(mic);
1468		pthread_exit(NULL);
1469	}
1470	
1471	static void
1472	set_cmdline(struct mic_info *mic)
1473	{
1474		char buffer[PATH_MAX];
1475		int len;
1476	
1477		len = snprintf(buffer, PATH_MAX,
1478			"clocksource=tsc highres=off nohz=off ");
1479		len += snprintf(buffer + len, PATH_MAX,
1480			"cpufreq_on;corec6_off;pc3_off;pc6_off ");
1481		len += snprintf(buffer + len, PATH_MAX,
1482			"ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1483			mic->id);
1484	
1485		setsysfs(mic->name, "cmdline", buffer);
1486		mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1487		snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
1488		mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1489	}
1490	
1491	static void
1492	set_log_buf_info(struct mic_info *mic)
1493	{
1494		int fd;
1495		off_t len;
1496		char system_map[] = "/lib/firmware/mic/System.map";
1497		char *map, *temp, log_buf[17] = {'\0'};
1498	
1499		fd = open(system_map, O_RDONLY);
1500		if (fd < 0) {
1501			mpsslog("%s: Opening System.map failed: %d\n",
1502				mic->name, errno);
1503			return;
1504		}
1505		len = lseek(fd, 0, SEEK_END);
1506		if (len < 0) {
1507			mpsslog("%s: Reading System.map size failed: %d\n",
1508				mic->name, errno);
1509			close(fd);
1510			return;
1511		}
1512		map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1513		if (map == MAP_FAILED) {
1514			mpsslog("%s: mmap of System.map failed: %d\n",
1515				mic->name, errno);
1516			close(fd);
1517			return;
1518		}
1519		temp = strstr(map, "__log_buf");
1520		if (!temp) {
1521			mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1522			munmap(map, len);
1523			close(fd);
1524			return;
1525		}
1526		strncpy(log_buf, temp - 19, 16);
1527		setsysfs(mic->name, "log_buf_addr", log_buf);
1528		mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1529		temp = strstr(map, "log_buf_len");
1530		if (!temp) {
1531			mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1532			munmap(map, len);
1533			close(fd);
1534			return;
1535		}
1536		strncpy(log_buf, temp - 19, 16);
1537		setsysfs(mic->name, "log_buf_len", log_buf);
1538		mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1539		munmap(map, len);
1540		close(fd);
1541	}
1542	
1543	static void init_mic(struct mic_info *mic);
1544	
1545	static void
1546	change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1547	{
1548		struct mic_info *mic;
1549	
1550		for (mic = mic_list.next; mic != NULL; mic = mic->next)
1551			mic->mic_virtblk.signaled = 1/* true */;
1552	}
1553	
1554	static void
1555	init_mic(struct mic_info *mic)
1556	{
1557		struct sigaction ignore = {
1558			.sa_flags = 0,
1559			.sa_handler = SIG_IGN
1560		};
1561		struct sigaction act = {
1562			.sa_flags = SA_SIGINFO,
1563			.sa_sigaction = change_virtblk_backend,
1564		};
1565		char buffer[PATH_MAX];
1566		int err;
1567	
1568		/*
1569		 * Currently, one virtio block device is supported for each MIC card
1570		 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1571		 * The signal informs the virtio block backend about a change in the
1572		 * configuration file which specifies the virtio backend file name on
1573		 * the host. Virtio block backend then re-reads the configuration file
1574		 * and switches to the new block device. This signalling mechanism may
1575		 * not be required once multiple virtio block devices are supported by
1576		 * the MIC daemon.
1577		 */
1578		sigaction(SIGUSR1, &ignore, NULL);
1579	
1580		mic->pid = fork();
1581		switch (mic->pid) {
1582		case 0:
1583			set_log_buf_info(mic);
1584			set_cmdline(mic);
1585			add_virtio_device(mic, &virtcons_dev_page.dd);
1586			add_virtio_device(mic, &virtnet_dev_page.dd);
1587			err = pthread_create(&mic->mic_console.console_thread, NULL,
1588				virtio_console, mic);
1589			if (err)
1590				mpsslog("%s virtcons pthread_create failed %s\n",
1591					mic->name, strerror(err));
1592			err = pthread_create(&mic->mic_net.net_thread, NULL,
1593				virtio_net, mic);
1594			if (err)
1595				mpsslog("%s virtnet pthread_create failed %s\n",
1596					mic->name, strerror(err));
1597			err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1598				virtio_block, mic);
1599			if (err)
1600				mpsslog("%s virtblk pthread_create failed %s\n",
1601					mic->name, strerror(err));
1602			sigemptyset(&act.sa_mask);
1603			err = sigaction(SIGUSR1, &act, NULL);
1604			if (err)
1605				mpsslog("%s sigaction SIGUSR1 failed %s\n",
1606					mic->name, strerror(errno));
1607			while (1)
1608				sleep(60);
1609		case -1:
1610			mpsslog("fork failed MIC name %s id %d errno %d\n",
1611				mic->name, mic->id, errno);
1612			break;
1613		default:
1614			if (mic->restart) {
1615				snprintf(buffer, PATH_MAX, "boot");
1616				setsysfs(mic->name, "state", buffer);
1617				mpsslog("%s restarting mic %d\n",
1618					mic->name, mic->restart);
1619				mic->restart = 0;
1620			}
1621			pthread_create(&mic->config_thread, NULL, mic_config, mic);
1622		}
1623	}
1624	
1625	static void
1626	start_daemon(void)
1627	{
1628		struct mic_info *mic;
1629	
1630		for (mic = mic_list.next; mic != NULL; mic = mic->next)
1631			init_mic(mic);
1632	
1633		while (1)
1634			sleep(60);
1635	}
1636	
1637	static int
1638	init_mic_list(void)
1639	{
1640		struct mic_info *mic = &mic_list;
1641		struct dirent *file;
1642		DIR *dp;
1643		int cnt = 0;
1644	
1645		dp = opendir(MICSYSFSDIR);
1646		if (!dp)
1647			return 0;
1648	
1649		while ((file = readdir(dp)) != NULL) {
1650			if (!strncmp(file->d_name, "mic", 3)) {
1651				mic->next = calloc(1, sizeof(struct mic_info));
1652				if (mic->next) {
1653					mic = mic->next;
1654					mic->id = atoi(&file->d_name[3]);
1655					mic->name = malloc(strlen(file->d_name) + 16);
1656					if (mic->name)
1657						strcpy(mic->name, file->d_name);
1658					mpsslog("MIC name %s id %d\n", mic->name,
1659						mic->id);
1660					cnt++;
1661				}
1662			}
1663		}
1664	
1665		closedir(dp);
1666		return cnt;
1667	}
1668	
1669	void
1670	mpsslog(char *format, ...)
1671	{
1672		va_list args;
1673		char buffer[4096];
1674		char ts[52], *ts1;
1675		time_t t;
1676	
1677		if (logfp == NULL)
1678			return;
1679	
1680		va_start(args, format);
1681		vsprintf(buffer, format, args);
1682		va_end(args);
1683	
1684		time(&t);
1685		ts1 = ctime_r(&t, ts);
1686		ts1[strlen(ts1) - 1] = '\0';
1687		fprintf(logfp, "%s: %s", ts1, buffer);
1688	
1689		fflush(logfp);
1690	}
1691	
1692	int
1693	main(int argc, char *argv[])
1694	{
1695		int cnt;
1696		pid_t pid;
1697	
1698		myname = argv[0];
1699	
1700		logfp = fopen(LOGFILE_NAME, "a+");
1701		if (!logfp) {
1702			fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1703			exit(1);
1704		}
1705		pid = fork();
1706		switch (pid) {
1707		case 0:
1708			break;
1709		case -1:
1710			exit(2);
1711		default:
1712			exit(0);
1713		}
1714	
1715		mpsslog("MIC Daemon start\n");
1716	
1717		cnt = init_mic_list();
1718		if (cnt == 0) {
1719			mpsslog("MIC module not loaded\n");
1720			exit(3);
1721		}
1722		mpsslog("MIC found %d devices\n", cnt);
1723	
1724		start_daemon();
1725	
1726		exit(0);
1727	}
Hide Line Numbers
About Kernel Documentation Linux Kernel Contact Linux Resources Linux Blog

Information is copyright its respective author. All material is available from the Linux Kernel Source distributed under a GPL License. This page is provided as a free service by mjmwired.net.