About Kernel Documentation Linux Kernel Contact Linux Resources Linux Blog

Documentation / mic / mpssd / mpssd.c




Custom Search

Based on kernel version 4.1. Page generated on 2015-06-28 12:13 EST.

1	/*
2	 * Intel MIC Platform Software Stack (MPSS)
3	 *
4	 * Copyright(c) 2013 Intel Corporation.
5	 *
6	 * This program is free software; you can redistribute it and/or modify
7	 * it under the terms of the GNU General Public License, version 2, as
8	 * published by the Free Software Foundation.
9	 *
10	 * This program is distributed in the hope that it will be useful, but
11	 * WITHOUT ANY WARRANTY; without even the implied warranty of
12	 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	 * General Public License for more details.
14	 *
15	 * The full GNU General Public License is included in this distribution in
16	 * the file called "COPYING".
17	 *
18	 * Intel MIC User Space Tools.
19	 */
20	
21	#define _GNU_SOURCE
22	
23	#include <stdlib.h>
24	#include <fcntl.h>
25	#include <getopt.h>
26	#include <assert.h>
27	#include <unistd.h>
28	#include <stdbool.h>
29	#include <signal.h>
30	#include <poll.h>
31	#include <features.h>
32	#include <sys/types.h>
33	#include <sys/stat.h>
34	#include <sys/mman.h>
35	#include <sys/socket.h>
36	#include <linux/virtio_ring.h>
37	#include <linux/virtio_net.h>
38	#include <linux/virtio_console.h>
39	#include <linux/virtio_blk.h>
40	#include <linux/version.h>
41	#include "mpssd.h"
42	#include <linux/mic_ioctl.h>
43	#include <linux/mic_common.h>
44	#include <tools/endian.h>
45	
46	static void init_mic(struct mic_info *mic);
47	
48	static FILE *logfp;
49	static struct mic_info mic_list;
50	
51	#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
52	
53	#define min_t(type, x, y) ({				\
54			type __min1 = (x);                      \
55			type __min2 = (y);                      \
56			__min1 < __min2 ? __min1 : __min2; })
57	
58	/* align addr on a size boundary - adjust address up/down if needed */
59	#define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
60	#define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
61	
62	/* align addr on a size boundary - adjust address up if needed */
63	#define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
64	
65	/* to align the pointer to the (next) page boundary */
66	#define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
67	
68	#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
69	
70	#define GSO_ENABLED		1
71	#define MAX_GSO_SIZE		(64 * 1024)
72	#define ETH_H_LEN		14
73	#define MAX_NET_PKT_SIZE	(_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
74	#define MIC_DEVICE_PAGE_END	0x1000
75	
76	#ifndef VIRTIO_NET_HDR_F_DATA_VALID
77	#define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
78	#endif
79	
80	static struct {
81		struct mic_device_desc dd;
82		struct mic_vqconfig vqconfig[2];
83		__u32 host_features, guest_acknowledgements;
84		struct virtio_console_config cons_config;
85	} virtcons_dev_page = {
86		.dd = {
87			.type = VIRTIO_ID_CONSOLE,
88			.num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
89			.feature_len = sizeof(virtcons_dev_page.host_features),
90			.config_len = sizeof(virtcons_dev_page.cons_config),
91		},
92		.vqconfig[0] = {
93			.num = htole16(MIC_VRING_ENTRIES),
94		},
95		.vqconfig[1] = {
96			.num = htole16(MIC_VRING_ENTRIES),
97		},
98	};
99	
100	static struct {
101		struct mic_device_desc dd;
102		struct mic_vqconfig vqconfig[2];
103		__u32 host_features, guest_acknowledgements;
104		struct virtio_net_config net_config;
105	} virtnet_dev_page = {
106		.dd = {
107			.type = VIRTIO_ID_NET,
108			.num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
109			.feature_len = sizeof(virtnet_dev_page.host_features),
110			.config_len = sizeof(virtnet_dev_page.net_config),
111		},
112		.vqconfig[0] = {
113			.num = htole16(MIC_VRING_ENTRIES),
114		},
115		.vqconfig[1] = {
116			.num = htole16(MIC_VRING_ENTRIES),
117		},
118	#if GSO_ENABLED
119			.host_features = htole32(
120			1 << VIRTIO_NET_F_CSUM |
121			1 << VIRTIO_NET_F_GSO |
122			1 << VIRTIO_NET_F_GUEST_TSO4 |
123			1 << VIRTIO_NET_F_GUEST_TSO6 |
124			1 << VIRTIO_NET_F_GUEST_ECN |
125			1 << VIRTIO_NET_F_GUEST_UFO),
126	#else
127			.host_features = 0,
128	#endif
129	};
130	
131	static const char *mic_config_dir = "/etc/sysconfig/mic";
132	static const char *virtblk_backend = "VIRTBLK_BACKEND";
133	static struct {
134		struct mic_device_desc dd;
135		struct mic_vqconfig vqconfig[1];
136		__u32 host_features, guest_acknowledgements;
137		struct virtio_blk_config blk_config;
138	} virtblk_dev_page = {
139		.dd = {
140			.type = VIRTIO_ID_BLOCK,
141			.num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
142			.feature_len = sizeof(virtblk_dev_page.host_features),
143			.config_len = sizeof(virtblk_dev_page.blk_config),
144		},
145		.vqconfig[0] = {
146			.num = htole16(MIC_VRING_ENTRIES),
147		},
148		.host_features =
149			htole32(1<<VIRTIO_BLK_F_SEG_MAX),
150		.blk_config = {
151			.seg_max = htole32(MIC_VRING_ENTRIES - 2),
152			.capacity = htole64(0),
153		 }
154	};
155	
156	static char *myname;
157	
158	static int
159	tap_configure(struct mic_info *mic, char *dev)
160	{
161		pid_t pid;
162		char *ifargv[7];
163		char ipaddr[IFNAMSIZ];
164		int ret = 0;
165	
166		pid = fork();
167		if (pid == 0) {
168			ifargv[0] = "ip";
169			ifargv[1] = "link";
170			ifargv[2] = "set";
171			ifargv[3] = dev;
172			ifargv[4] = "up";
173			ifargv[5] = NULL;
174			mpsslog("Configuring %s\n", dev);
175			ret = execvp("ip", ifargv);
176			if (ret < 0) {
177				mpsslog("%s execvp failed errno %s\n",
178					mic->name, strerror(errno));
179				return ret;
180			}
181		}
182		if (pid < 0) {
183			mpsslog("%s fork failed errno %s\n",
184				mic->name, strerror(errno));
185			return ret;
186		}
187	
188		ret = waitpid(pid, NULL, 0);
189		if (ret < 0) {
190			mpsslog("%s waitpid failed errno %s\n",
191				mic->name, strerror(errno));
192			return ret;
193		}
194	
195		snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
196	
197		pid = fork();
198		if (pid == 0) {
199			ifargv[0] = "ip";
200			ifargv[1] = "addr";
201			ifargv[2] = "add";
202			ifargv[3] = ipaddr;
203			ifargv[4] = "dev";
204			ifargv[5] = dev;
205			ifargv[6] = NULL;
206			mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
207			ret = execvp("ip", ifargv);
208			if (ret < 0) {
209				mpsslog("%s execvp failed errno %s\n",
210					mic->name, strerror(errno));
211				return ret;
212			}
213		}
214		if (pid < 0) {
215			mpsslog("%s fork failed errno %s\n",
216				mic->name, strerror(errno));
217			return ret;
218		}
219	
220		ret = waitpid(pid, NULL, 0);
221		if (ret < 0) {
222			mpsslog("%s waitpid failed errno %s\n",
223				mic->name, strerror(errno));
224			return ret;
225		}
226		mpsslog("MIC name %s %s %d DONE!\n",
227			mic->name, __func__, __LINE__);
228		return 0;
229	}
230	
231	static int tun_alloc(struct mic_info *mic, char *dev)
232	{
233		struct ifreq ifr;
234		int fd, err;
235	#if GSO_ENABLED
236		unsigned offload;
237	#endif
238		fd = open("/dev/net/tun", O_RDWR);
239		if (fd < 0) {
240			mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
241			goto done;
242		}
243	
244		memset(&ifr, 0, sizeof(ifr));
245	
246		ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
247		if (*dev)
248			strncpy(ifr.ifr_name, dev, IFNAMSIZ);
249	
250		err = ioctl(fd, TUNSETIFF, (void *)&ifr);
251		if (err < 0) {
252			mpsslog("%s %s %d TUNSETIFF failed %s\n",
253				mic->name, __func__, __LINE__, strerror(errno));
254			close(fd);
255			return err;
256		}
257	#if GSO_ENABLED
258		offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
259			TUN_F_TSO_ECN | TUN_F_UFO;
260	
261		err = ioctl(fd, TUNSETOFFLOAD, offload);
262		if (err < 0) {
263			mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
264				mic->name, __func__, __LINE__, strerror(errno));
265			close(fd);
266			return err;
267		}
268	#endif
269		strcpy(dev, ifr.ifr_name);
270		mpsslog("Created TAP %s\n", dev);
271	done:
272		return fd;
273	}
274	
275	#define NET_FD_VIRTIO_NET 0
276	#define NET_FD_TUN 1
277	#define MAX_NET_FD 2
278	
279	static void set_dp(struct mic_info *mic, int type, void *dp)
280	{
281		switch (type) {
282		case VIRTIO_ID_CONSOLE:
283			mic->mic_console.console_dp = dp;
284			return;
285		case VIRTIO_ID_NET:
286			mic->mic_net.net_dp = dp;
287			return;
288		case VIRTIO_ID_BLOCK:
289			mic->mic_virtblk.block_dp = dp;
290			return;
291		}
292		mpsslog("%s %s %d not found\n", mic->name, __func__, type);
293		assert(0);
294	}
295	
296	static void *get_dp(struct mic_info *mic, int type)
297	{
298		switch (type) {
299		case VIRTIO_ID_CONSOLE:
300			return mic->mic_console.console_dp;
301		case VIRTIO_ID_NET:
302			return mic->mic_net.net_dp;
303		case VIRTIO_ID_BLOCK:
304			return mic->mic_virtblk.block_dp;
305		}
306		mpsslog("%s %s %d not found\n", mic->name, __func__, type);
307		assert(0);
308		return NULL;
309	}
310	
311	static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
312	{
313		struct mic_device_desc *d;
314		int i;
315		void *dp = get_dp(mic, type);
316	
317		for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
318			i += mic_total_desc_size(d)) {
319			d = dp + i;
320	
321			/* End of list */
322			if (d->type == 0)
323				break;
324	
325			if (d->type == -1)
326				continue;
327	
328			mpsslog("%s %s d-> type %d d %p\n",
329				mic->name, __func__, d->type, d);
330	
331			if (d->type == (__u8)type)
332				return d;
333		}
334		mpsslog("%s %s %d not found\n", mic->name, __func__, type);
335		assert(0);
336		return NULL;
337	}
338	
339	/* See comments in vhost.c for explanation of next_desc() */
340	static unsigned next_desc(struct vring_desc *desc)
341	{
342		unsigned int next;
343	
344		if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
345			return -1U;
346		next = le16toh(desc->next);
347		return next;
348	}
349	
350	/* Sum up all the IOVEC length */
351	static ssize_t
352	sum_iovec_len(struct mic_copy_desc *copy)
353	{
354		ssize_t sum = 0;
355		int i;
356	
357		for (i = 0; i < copy->iovcnt; i++)
358			sum += copy->iov[i].iov_len;
359		return sum;
360	}
361	
362	static inline void verify_out_len(struct mic_info *mic,
363		struct mic_copy_desc *copy)
364	{
365		if (copy->out_len != sum_iovec_len(copy)) {
366			mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
367				mic->name, __func__, __LINE__,
368				copy->out_len, sum_iovec_len(copy));
369			assert(copy->out_len == sum_iovec_len(copy));
370		}
371	}
372	
373	/* Display an iovec */
374	static void
375	disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
376		   const char *s, int line)
377	{
378		int i;
379	
380		for (i = 0; i < copy->iovcnt; i++)
381			mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
382				mic->name, s, line, i,
383				copy->iov[i].iov_base, copy->iov[i].iov_len);
384	}
385	
386	static inline __u16 read_avail_idx(struct mic_vring *vr)
387	{
388		return ACCESS_ONCE(vr->info->avail_idx);
389	}
390	
391	static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
392					struct mic_copy_desc *copy, ssize_t len)
393	{
394		copy->vr_idx = tx ? 0 : 1;
395		copy->update_used = true;
396		if (type == VIRTIO_ID_NET)
397			copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
398		else
399			copy->iov[0].iov_len = len;
400	}
401	
402	/* Central API which triggers the copies */
403	static int
404	mic_virtio_copy(struct mic_info *mic, int fd,
405			struct mic_vring *vr, struct mic_copy_desc *copy)
406	{
407		int ret;
408	
409		ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
410		if (ret) {
411			mpsslog("%s %s %d errno %s ret %d\n",
412				mic->name, __func__, __LINE__,
413				strerror(errno), ret);
414		}
415		return ret;
416	}
417	
418	/*
419	 * This initialization routine requires at least one
420	 * vring i.e. vr0. vr1 is optional.
421	 */
422	static void *
423	init_vr(struct mic_info *mic, int fd, int type,
424		struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
425	{
426		int vr_size;
427		char *va;
428	
429		vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
430			MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
431		va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
432			PROT_READ, MAP_SHARED, fd, 0);
433		if (MAP_FAILED == va) {
434			mpsslog("%s %s %d mmap failed errno %s\n",
435				mic->name, __func__, __LINE__,
436				strerror(errno));
437			goto done;
438		}
439		set_dp(mic, type, va);
440		vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
441		vr0->info = vr0->va +
442			vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
443		vring_init(&vr0->vr,
444			   MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
445		mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
446			__func__, mic->name, vr0->va, vr0->info, vr_size,
447			vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
448		mpsslog("magic 0x%x expected 0x%x\n",
449			le32toh(vr0->info->magic), MIC_MAGIC + type);
450		assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
451		if (vr1) {
452			vr1->va = (struct mic_vring *)
453				&va[MIC_DEVICE_PAGE_END + vr_size];
454			vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
455				MIC_VIRTIO_RING_ALIGN);
456			vring_init(&vr1->vr,
457				   MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
458			mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
459				__func__, mic->name, vr1->va, vr1->info, vr_size,
460				vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
461			mpsslog("magic 0x%x expected 0x%x\n",
462				le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
463			assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
464		}
465	done:
466		return va;
467	}
468	
469	static void
470	wait_for_card_driver(struct mic_info *mic, int fd, int type)
471	{
472		struct pollfd pollfd;
473		int err;
474		struct mic_device_desc *desc = get_device_desc(mic, type);
475	
476		pollfd.fd = fd;
477		mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
478			mic->name, __func__, type, desc->status);
479		while (1) {
480			pollfd.events = POLLIN;
481			pollfd.revents = 0;
482			err = poll(&pollfd, 1, -1);
483			if (err < 0) {
484				mpsslog("%s %s poll failed %s\n",
485					mic->name, __func__, strerror(errno));
486				continue;
487			}
488	
489			if (pollfd.revents) {
490				mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
491					mic->name, __func__, type, desc->status);
492				if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
493					mpsslog("%s %s poll.revents %d\n",
494						mic->name, __func__, pollfd.revents);
495					mpsslog("%s %s desc-> type %d status 0x%x\n",
496						mic->name, __func__, type,
497						desc->status);
498					break;
499				}
500			}
501		}
502	}
503	
504	/* Spin till we have some descriptors */
505	static void
506	spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
507	{
508		__u16 avail_idx = read_avail_idx(vr);
509	
510		while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
511	#ifdef DEBUG
512			mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
513				mic->name, __func__,
514				le16toh(vr->vr.avail->idx), vr->info->avail_idx);
515	#endif
516			sched_yield();
517		}
518	}
519	
520	static void *
521	virtio_net(void *arg)
522	{
523		static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
524		static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
525		struct iovec vnet_iov[2][2] = {
526			{ { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
527			  { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
528			{ { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
529			  { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
530		};
531		struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
532		struct mic_info *mic = (struct mic_info *)arg;
533		char if_name[IFNAMSIZ];
534		struct pollfd net_poll[MAX_NET_FD];
535		struct mic_vring tx_vr, rx_vr;
536		struct mic_copy_desc copy;
537		struct mic_device_desc *desc;
538		int err;
539	
540		snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
541		mic->mic_net.tap_fd = tun_alloc(mic, if_name);
542		if (mic->mic_net.tap_fd < 0)
543			goto done;
544	
545		if (tap_configure(mic, if_name))
546			goto done;
547		mpsslog("MIC name %s id %d\n", mic->name, mic->id);
548	
549		net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
550		net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
551		net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
552		net_poll[NET_FD_TUN].events = POLLIN;
553	
554		if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
555					  VIRTIO_ID_NET, &tx_vr, &rx_vr,
556			virtnet_dev_page.dd.num_vq)) {
557			mpsslog("%s init_vr failed %s\n",
558				mic->name, strerror(errno));
559			goto done;
560		}
561	
562		copy.iovcnt = 2;
563		desc = get_device_desc(mic, VIRTIO_ID_NET);
564	
565		while (1) {
566			ssize_t len;
567	
568			net_poll[NET_FD_VIRTIO_NET].revents = 0;
569			net_poll[NET_FD_TUN].revents = 0;
570	
571			/* Start polling for data from tap and virtio net */
572			err = poll(net_poll, 2, -1);
573			if (err < 0) {
574				mpsslog("%s poll failed %s\n",
575					__func__, strerror(errno));
576				continue;
577			}
578			if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
579				wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
580						     VIRTIO_ID_NET);
581			/*
582			 * Check if there is data to be read from TUN and write to
583			 * virtio net fd if there is.
584			 */
585			if (net_poll[NET_FD_TUN].revents & POLLIN) {
586				copy.iov = iov0;
587				len = readv(net_poll[NET_FD_TUN].fd,
588					copy.iov, copy.iovcnt);
589				if (len > 0) {
590					struct virtio_net_hdr *hdr
591						= (struct virtio_net_hdr *)vnet_hdr[0];
592	
593					/* Disable checksums on the card since we are on
594					   a reliable PCIe link */
595					hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
596	#ifdef DEBUG
597					mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
598						__func__, __LINE__, hdr->flags);
599					mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
600						copy.out_len, hdr->gso_type);
601	#endif
602	#ifdef DEBUG
603					disp_iovec(mic, copy, __func__, __LINE__);
604					mpsslog("%s %s %d read from tap 0x%lx\n",
605						mic->name, __func__, __LINE__,
606						len);
607	#endif
608					spin_for_descriptors(mic, &tx_vr);
609					txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
610						     len);
611	
612					err = mic_virtio_copy(mic,
613						mic->mic_net.virtio_net_fd, &tx_vr,
614						&copy);
615					if (err < 0) {
616						mpsslog("%s %s %d mic_virtio_copy %s\n",
617							mic->name, __func__, __LINE__,
618							strerror(errno));
619					}
620					if (!err)
621						verify_out_len(mic, &copy);
622	#ifdef DEBUG
623					disp_iovec(mic, copy, __func__, __LINE__);
624					mpsslog("%s %s %d wrote to net 0x%lx\n",
625						mic->name, __func__, __LINE__,
626						sum_iovec_len(&copy));
627	#endif
628					/* Reinitialize IOV for next run */
629					iov0[1].iov_len = MAX_NET_PKT_SIZE;
630				} else if (len < 0) {
631					disp_iovec(mic, &copy, __func__, __LINE__);
632					mpsslog("%s %s %d read failed %s ", mic->name,
633						__func__, __LINE__, strerror(errno));
634					mpsslog("cnt %d sum %zd\n",
635						copy.iovcnt, sum_iovec_len(&copy));
636				}
637			}
638	
639			/*
640			 * Check if there is data to be read from virtio net and
641			 * write to TUN if there is.
642			 */
643			if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
644				while (rx_vr.info->avail_idx !=
645					le16toh(rx_vr.vr.avail->idx)) {
646					copy.iov = iov1;
647					txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
648						     MAX_NET_PKT_SIZE
649						+ sizeof(struct virtio_net_hdr));
650	
651					err = mic_virtio_copy(mic,
652						mic->mic_net.virtio_net_fd, &rx_vr,
653						&copy);
654					if (!err) {
655	#ifdef DEBUG
656						struct virtio_net_hdr *hdr
657							= (struct virtio_net_hdr *)
658								vnet_hdr[1];
659	
660						mpsslog("%s %s %d hdr->flags 0x%x, ",
661							mic->name, __func__, __LINE__,
662							hdr->flags);
663						mpsslog("out_len %d gso_type 0x%x\n",
664							copy.out_len,
665							hdr->gso_type);
666	#endif
667						/* Set the correct output iov_len */
668						iov1[1].iov_len = copy.out_len -
669							sizeof(struct virtio_net_hdr);
670						verify_out_len(mic, &copy);
671	#ifdef DEBUG
672						disp_iovec(mic, copy, __func__,
673							   __LINE__);
674						mpsslog("%s %s %d ",
675							mic->name, __func__, __LINE__);
676						mpsslog("read from net 0x%lx\n",
677							sum_iovec_len(copy));
678	#endif
679						len = writev(net_poll[NET_FD_TUN].fd,
680							copy.iov, copy.iovcnt);
681						if (len != sum_iovec_len(&copy)) {
682							mpsslog("Tun write failed %s ",
683								strerror(errno));
684							mpsslog("len 0x%zx ", len);
685							mpsslog("read_len 0x%zx\n",
686								sum_iovec_len(&copy));
687						} else {
688	#ifdef DEBUG
689							disp_iovec(mic, &copy, __func__,
690								   __LINE__);
691							mpsslog("%s %s %d ",
692								mic->name, __func__,
693								__LINE__);
694							mpsslog("wrote to tap 0x%lx\n",
695								len);
696	#endif
697						}
698					} else {
699						mpsslog("%s %s %d mic_virtio_copy %s\n",
700							mic->name, __func__, __LINE__,
701							strerror(errno));
702						break;
703					}
704				}
705			}
706			if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
707				mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
708		}
709	done:
710		pthread_exit(NULL);
711	}
712	
713	/* virtio_console */
714	#define VIRTIO_CONSOLE_FD 0
715	#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
716	#define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
717	#define MAX_BUFFER_SIZE PAGE_SIZE
718	
719	static void *
720	virtio_console(void *arg)
721	{
722		static __u8 vcons_buf[2][PAGE_SIZE];
723		struct iovec vcons_iov[2] = {
724			{ .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
725			{ .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
726		};
727		struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
728		struct mic_info *mic = (struct mic_info *)arg;
729		int err;
730		struct pollfd console_poll[MAX_CONSOLE_FD];
731		int pty_fd;
732		char *pts_name;
733		ssize_t len;
734		struct mic_vring tx_vr, rx_vr;
735		struct mic_copy_desc copy;
736		struct mic_device_desc *desc;
737	
738		pty_fd = posix_openpt(O_RDWR);
739		if (pty_fd < 0) {
740			mpsslog("can't open a pseudoterminal master device: %s\n",
741				strerror(errno));
742			goto _return;
743		}
744		pts_name = ptsname(pty_fd);
745		if (pts_name == NULL) {
746			mpsslog("can't get pts name\n");
747			goto _close_pty;
748		}
749		printf("%s console message goes to %s\n", mic->name, pts_name);
750		mpsslog("%s console message goes to %s\n", mic->name, pts_name);
751		err = grantpt(pty_fd);
752		if (err < 0) {
753			mpsslog("can't grant access: %s %s\n",
754				pts_name, strerror(errno));
755			goto _close_pty;
756		}
757		err = unlockpt(pty_fd);
758		if (err < 0) {
759			mpsslog("can't unlock a pseudoterminal: %s %s\n",
760				pts_name, strerror(errno));
761			goto _close_pty;
762		}
763		console_poll[MONITOR_FD].fd = pty_fd;
764		console_poll[MONITOR_FD].events = POLLIN;
765	
766		console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
767		console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
768	
769		if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
770					  VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
771			virtcons_dev_page.dd.num_vq)) {
772			mpsslog("%s init_vr failed %s\n",
773				mic->name, strerror(errno));
774			goto _close_pty;
775		}
776	
777		copy.iovcnt = 1;
778		desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
779	
780		for (;;) {
781			console_poll[MONITOR_FD].revents = 0;
782			console_poll[VIRTIO_CONSOLE_FD].revents = 0;
783			err = poll(console_poll, MAX_CONSOLE_FD, -1);
784			if (err < 0) {
785				mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
786					strerror(errno));
787				continue;
788			}
789			if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
790				wait_for_card_driver(mic,
791						     mic->mic_console.virtio_console_fd,
792					VIRTIO_ID_CONSOLE);
793	
794			if (console_poll[MONITOR_FD].revents & POLLIN) {
795				copy.iov = iov0;
796				len = readv(pty_fd, copy.iov, copy.iovcnt);
797				if (len > 0) {
798	#ifdef DEBUG
799					disp_iovec(mic, copy, __func__, __LINE__);
800					mpsslog("%s %s %d read from tap 0x%lx\n",
801						mic->name, __func__, __LINE__,
802						len);
803	#endif
804					spin_for_descriptors(mic, &tx_vr);
805					txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
806						     &copy, len);
807	
808					err = mic_virtio_copy(mic,
809						mic->mic_console.virtio_console_fd,
810						&tx_vr, &copy);
811					if (err < 0) {
812						mpsslog("%s %s %d mic_virtio_copy %s\n",
813							mic->name, __func__, __LINE__,
814							strerror(errno));
815					}
816					if (!err)
817						verify_out_len(mic, &copy);
818	#ifdef DEBUG
819					disp_iovec(mic, copy, __func__, __LINE__);
820					mpsslog("%s %s %d wrote to net 0x%lx\n",
821						mic->name, __func__, __LINE__,
822						sum_iovec_len(copy));
823	#endif
824					/* Reinitialize IOV for next run */
825					iov0->iov_len = PAGE_SIZE;
826				} else if (len < 0) {
827					disp_iovec(mic, &copy, __func__, __LINE__);
828					mpsslog("%s %s %d read failed %s ",
829						mic->name, __func__, __LINE__,
830						strerror(errno));
831					mpsslog("cnt %d sum %zd\n",
832						copy.iovcnt, sum_iovec_len(&copy));
833				}
834			}
835	
836			if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
837				while (rx_vr.info->avail_idx !=
838					le16toh(rx_vr.vr.avail->idx)) {
839					copy.iov = iov1;
840					txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
841						     &copy, PAGE_SIZE);
842	
843					err = mic_virtio_copy(mic,
844						mic->mic_console.virtio_console_fd,
845						&rx_vr, &copy);
846					if (!err) {
847						/* Set the correct output iov_len */
848						iov1->iov_len = copy.out_len;
849						verify_out_len(mic, &copy);
850	#ifdef DEBUG
851						disp_iovec(mic, copy, __func__,
852							   __LINE__);
853						mpsslog("%s %s %d ",
854							mic->name, __func__, __LINE__);
855						mpsslog("read from net 0x%lx\n",
856							sum_iovec_len(copy));
857	#endif
858						len = writev(pty_fd,
859							copy.iov, copy.iovcnt);
860						if (len != sum_iovec_len(&copy)) {
861							mpsslog("Tun write failed %s ",
862								strerror(errno));
863							mpsslog("len 0x%zx ", len);
864							mpsslog("read_len 0x%zx\n",
865								sum_iovec_len(&copy));
866						} else {
867	#ifdef DEBUG
868							disp_iovec(mic, copy, __func__,
869								   __LINE__);
870							mpsslog("%s %s %d ",
871								mic->name, __func__,
872								__LINE__);
873							mpsslog("wrote to tap 0x%lx\n",
874								len);
875	#endif
876						}
877					} else {
878						mpsslog("%s %s %d mic_virtio_copy %s\n",
879							mic->name, __func__, __LINE__,
880							strerror(errno));
881						break;
882					}
883				}
884			}
885			if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
886				mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
887		}
888	_close_pty:
889		close(pty_fd);
890	_return:
891		pthread_exit(NULL);
892	}
893	
894	static void
895	add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
896	{
897		char path[PATH_MAX];
898		int fd, err;
899	
900		snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
901		fd = open(path, O_RDWR);
902		if (fd < 0) {
903			mpsslog("Could not open %s %s\n", path, strerror(errno));
904			return;
905		}
906	
907		err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
908		if (err < 0) {
909			mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
910			close(fd);
911			return;
912		}
913		switch (dd->type) {
914		case VIRTIO_ID_NET:
915			mic->mic_net.virtio_net_fd = fd;
916			mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
917			break;
918		case VIRTIO_ID_CONSOLE:
919			mic->mic_console.virtio_console_fd = fd;
920			mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
921			break;
922		case VIRTIO_ID_BLOCK:
923			mic->mic_virtblk.virtio_block_fd = fd;
924			mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
925			break;
926		}
927	}
928	
929	static bool
930	set_backend_file(struct mic_info *mic)
931	{
932		FILE *config;
933		char buff[PATH_MAX], *line, *evv, *p;
934	
935		snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
936		config = fopen(buff, "r");
937		if (config == NULL)
938			return false;
939		do {  /* look for "virtblk_backend=XXXX" */
940			line = fgets(buff, PATH_MAX, config);
941			if (line == NULL)
942				break;
943			if (*line == '#')
944				continue;
945			p = strchr(line, '\n');
946			if (p)
947				*p = '\0';
948		} while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
949		fclose(config);
950		if (line == NULL)
951			return false;
952		evv = strchr(line, '=');
953		if (evv == NULL)
954			return false;
955		mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
956		if (mic->mic_virtblk.backend_file == NULL) {
957			mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
958			return false;
959		}
960		strcpy(mic->mic_virtblk.backend_file, evv + 1);
961		return true;
962	}
963	
964	#define SECTOR_SIZE 512
965	static bool
966	set_backend_size(struct mic_info *mic)
967	{
968		mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
969			SEEK_END);
970		if (mic->mic_virtblk.backend_size < 0) {
971			mpsslog("%s: can't seek: %s\n",
972				mic->name, mic->mic_virtblk.backend_file);
973			return false;
974		}
975		virtblk_dev_page.blk_config.capacity =
976			mic->mic_virtblk.backend_size / SECTOR_SIZE;
977		if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
978			virtblk_dev_page.blk_config.capacity++;
979	
980		virtblk_dev_page.blk_config.capacity =
981			htole64(virtblk_dev_page.blk_config.capacity);
982	
983		return true;
984	}
985	
986	static bool
987	open_backend(struct mic_info *mic)
988	{
989		if (!set_backend_file(mic))
990			goto _error_exit;
991		mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
992		if (mic->mic_virtblk.backend < 0) {
993			mpsslog("%s: can't open: %s\n", mic->name,
994				mic->mic_virtblk.backend_file);
995			goto _error_free;
996		}
997		if (!set_backend_size(mic))
998			goto _error_close;
999		mic->mic_virtblk.backend_addr = mmap(NULL,
1000			mic->mic_virtblk.backend_size,
1001			PROT_READ|PROT_WRITE, MAP_SHARED,
1002			mic->mic_virtblk.backend, 0L);
1003		if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1004			mpsslog("%s: can't map: %s %s\n",
1005				mic->name, mic->mic_virtblk.backend_file,
1006				strerror(errno));
1007			goto _error_close;
1008		}
1009		return true;
1010	
1011	 _error_close:
1012		close(mic->mic_virtblk.backend);
1013	 _error_free:
1014		free(mic->mic_virtblk.backend_file);
1015	 _error_exit:
1016		return false;
1017	}
1018	
1019	static void
1020	close_backend(struct mic_info *mic)
1021	{
1022		munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1023		close(mic->mic_virtblk.backend);
1024		free(mic->mic_virtblk.backend_file);
1025	}
1026	
1027	static bool
1028	start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1029	{
1030		if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1031			mpsslog("%s: blk_config is not 8 byte aligned.\n",
1032				mic->name);
1033			return false;
1034		}
1035		add_virtio_device(mic, &virtblk_dev_page.dd);
1036		if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1037					  VIRTIO_ID_BLOCK, vring, NULL,
1038					  virtblk_dev_page.dd.num_vq)) {
1039			mpsslog("%s init_vr failed %s\n",
1040				mic->name, strerror(errno));
1041			return false;
1042		}
1043		return true;
1044	}
1045	
1046	static void
1047	stop_virtblk(struct mic_info *mic)
1048	{
1049		int vr_size, ret;
1050	
1051		vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
1052			MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
1053		ret = munmap(mic->mic_virtblk.block_dp,
1054			MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1055		if (ret < 0)
1056			mpsslog("%s munmap errno %d\n", mic->name, errno);
1057		close(mic->mic_virtblk.virtio_block_fd);
1058	}
1059	
1060	static __u8
1061	header_error_check(struct vring_desc *desc)
1062	{
1063		if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1064			mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1065				__func__, __LINE__);
1066			return -EIO;
1067		}
1068		if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1069			mpsslog("%s() %d: alone\n",
1070				__func__, __LINE__);
1071			return -EIO;
1072		}
1073		if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1074			mpsslog("%s() %d: not read\n",
1075				__func__, __LINE__);
1076			return -EIO;
1077		}
1078		return 0;
1079	}
1080	
1081	static int
1082	read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1083	{
1084		struct iovec iovec;
1085		struct mic_copy_desc copy;
1086	
1087		iovec.iov_len = sizeof(*hdr);
1088		iovec.iov_base = hdr;
1089		copy.iov = &iovec;
1090		copy.iovcnt = 1;
1091		copy.vr_idx = 0;  /* only one vring on virtio_block */
1092		copy.update_used = false;  /* do not update used index */
1093		return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1094	}
1095	
1096	static int
1097	transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1098	{
1099		struct mic_copy_desc copy;
1100	
1101		copy.iov = iovec;
1102		copy.iovcnt = iovcnt;
1103		copy.vr_idx = 0;  /* only one vring on virtio_block */
1104		copy.update_used = false;  /* do not update used index */
1105		return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1106	}
1107	
1108	static __u8
1109	status_error_check(struct vring_desc *desc)
1110	{
1111		if (le32toh(desc->len) != sizeof(__u8)) {
1112			mpsslog("%s() %d: length is not sizeof(status)\n",
1113				__func__, __LINE__);
1114			return -EIO;
1115		}
1116		return 0;
1117	}
1118	
1119	static int
1120	write_status(int fd, __u8 *status)
1121	{
1122		struct iovec iovec;
1123		struct mic_copy_desc copy;
1124	
1125		iovec.iov_base = status;
1126		iovec.iov_len = sizeof(*status);
1127		copy.iov = &iovec;
1128		copy.iovcnt = 1;
1129		copy.vr_idx = 0;  /* only one vring on virtio_block */
1130		copy.update_used = true; /* Update used index */
1131		return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1132	}
1133	
1134	static void *
1135	virtio_block(void *arg)
1136	{
1137		struct mic_info *mic = (struct mic_info *)arg;
1138		int ret;
1139		struct pollfd block_poll;
1140		struct mic_vring vring;
1141		__u16 avail_idx;
1142		__u32 desc_idx;
1143		struct vring_desc *desc;
1144		struct iovec *iovec, *piov;
1145		__u8 status;
1146		__u32 buffer_desc_idx;
1147		struct virtio_blk_outhdr hdr;
1148		void *fos;
1149	
1150		for (;;) {  /* forever */
1151			if (!open_backend(mic)) { /* No virtblk */
1152				for (mic->mic_virtblk.signaled = 0;
1153					!mic->mic_virtblk.signaled;)
1154					sleep(1);
1155				continue;
1156			}
1157	
1158			/* backend file is specified. */
1159			if (!start_virtblk(mic, &vring))
1160				goto _close_backend;
1161			iovec = malloc(sizeof(*iovec) *
1162				le32toh(virtblk_dev_page.blk_config.seg_max));
1163			if (!iovec) {
1164				mpsslog("%s: can't alloc iovec: %s\n",
1165					mic->name, strerror(ENOMEM));
1166				goto _stop_virtblk;
1167			}
1168	
1169			block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1170			block_poll.events = POLLIN;
1171			for (mic->mic_virtblk.signaled = 0;
1172			     !mic->mic_virtblk.signaled;) {
1173				block_poll.revents = 0;
1174						/* timeout in 1 sec to see signaled */
1175				ret = poll(&block_poll, 1, 1000);
1176				if (ret < 0) {
1177					mpsslog("%s %d: poll failed: %s\n",
1178						__func__, __LINE__,
1179						strerror(errno));
1180					continue;
1181				}
1182	
1183				if (!(block_poll.revents & POLLIN)) {
1184	#ifdef DEBUG
1185					mpsslog("%s %d: block_poll.revents=0x%x\n",
1186						__func__, __LINE__, block_poll.revents);
1187	#endif
1188					continue;
1189				}
1190	
1191				/* POLLIN */
1192				while (vring.info->avail_idx !=
1193					le16toh(vring.vr.avail->idx)) {
1194					/* read header element */
1195					avail_idx =
1196						vring.info->avail_idx &
1197						(vring.vr.num - 1);
1198					desc_idx = le16toh(
1199						vring.vr.avail->ring[avail_idx]);
1200					desc = &vring.vr.desc[desc_idx];
1201	#ifdef DEBUG
1202					mpsslog("%s() %d: avail_idx=%d ",
1203						__func__, __LINE__,
1204						vring.info->avail_idx);
1205					mpsslog("vring.vr.num=%d desc=%p\n",
1206						vring.vr.num, desc);
1207	#endif
1208					status = header_error_check(desc);
1209					ret = read_header(
1210						mic->mic_virtblk.virtio_block_fd,
1211						&hdr, desc_idx);
1212					if (ret < 0) {
1213						mpsslog("%s() %d %s: ret=%d %s\n",
1214							__func__, __LINE__,
1215							mic->name, ret,
1216							strerror(errno));
1217						break;
1218					}
1219					/* buffer element */
1220					piov = iovec;
1221					status = 0;
1222					fos = mic->mic_virtblk.backend_addr +
1223						(hdr.sector * SECTOR_SIZE);
1224					buffer_desc_idx = next_desc(desc);
1225					desc_idx = buffer_desc_idx;
1226					for (desc = &vring.vr.desc[buffer_desc_idx];
1227					     desc->flags & VRING_DESC_F_NEXT;
1228					     desc_idx = next_desc(desc),
1229						     desc = &vring.vr.desc[desc_idx]) {
1230						piov->iov_len = desc->len;
1231						piov->iov_base = fos;
1232						piov++;
1233						fos += desc->len;
1234					}
1235					/* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1236					if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1237						VIRTIO_BLK_T_GET_ID)) {
1238						/*
1239						  VIRTIO_BLK_T_IN - does not do
1240						  anything. Probably for documenting.
1241						  VIRTIO_BLK_T_SCSI_CMD - for
1242						  virtio_scsi.
1243						  VIRTIO_BLK_T_FLUSH - turned off in
1244						  config space.
1245						  VIRTIO_BLK_T_BARRIER - defined but not
1246						  used in anywhere.
1247						*/
1248						mpsslog("%s() %d: type %x ",
1249							__func__, __LINE__,
1250							hdr.type);
1251						mpsslog("is not supported\n");
1252						status = -ENOTSUP;
1253	
1254					} else {
1255						ret = transfer_blocks(
1256						mic->mic_virtblk.virtio_block_fd,
1257							iovec,
1258							piov - iovec);
1259						if (ret < 0 &&
1260						    status != 0)
1261							status = ret;
1262					}
1263					/* write status and update used pointer */
1264					if (status != 0)
1265						status = status_error_check(desc);
1266					ret = write_status(
1267						mic->mic_virtblk.virtio_block_fd,
1268						&status);
1269	#ifdef DEBUG
1270					mpsslog("%s() %d: write status=%d on desc=%p\n",
1271						__func__, __LINE__,
1272						status, desc);
1273	#endif
1274				}
1275			}
1276			free(iovec);
1277	_stop_virtblk:
1278			stop_virtblk(mic);
1279	_close_backend:
1280			close_backend(mic);
1281		}  /* forever */
1282	
1283		pthread_exit(NULL);
1284	}
1285	
1286	static void
1287	reset(struct mic_info *mic)
1288	{
1289	#define RESET_TIMEOUT 120
1290		int i = RESET_TIMEOUT;
1291		setsysfs(mic->name, "state", "reset");
1292		while (i) {
1293			char *state;
1294			state = readsysfs(mic->name, "state");
1295			if (!state)
1296				goto retry;
1297			mpsslog("%s: %s %d state %s\n",
1298				mic->name, __func__, __LINE__, state);
1299	
1300			/*
1301			 * If the shutdown was initiated by OSPM, the state stays
1302			 * in "suspended" which is also a valid condition for reset.
1303			 */
1304			if ((!strcmp(state, "offline")) ||
1305			    (!strcmp(state, "suspended"))) {
1306				free(state);
1307				break;
1308			}
1309			free(state);
1310	retry:
1311			sleep(1);
1312			i--;
1313		}
1314	}
1315	
1316	static int
1317	get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1318	{
1319		if (!strcmp(shutdown_status, "nop"))
1320			return MIC_NOP;
1321		if (!strcmp(shutdown_status, "crashed"))
1322			return MIC_CRASHED;
1323		if (!strcmp(shutdown_status, "halted"))
1324			return MIC_HALTED;
1325		if (!strcmp(shutdown_status, "poweroff"))
1326			return MIC_POWER_OFF;
1327		if (!strcmp(shutdown_status, "restart"))
1328			return MIC_RESTART;
1329		mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1330		/* Invalid state */
1331		assert(0);
1332	};
1333	
1334	static int get_mic_state(struct mic_info *mic, char *state)
1335	{
1336		if (!strcmp(state, "offline"))
1337			return MIC_OFFLINE;
1338		if (!strcmp(state, "online"))
1339			return MIC_ONLINE;
1340		if (!strcmp(state, "shutting_down"))
1341			return MIC_SHUTTING_DOWN;
1342		if (!strcmp(state, "reset_failed"))
1343			return MIC_RESET_FAILED;
1344		if (!strcmp(state, "suspending"))
1345			return MIC_SUSPENDING;
1346		if (!strcmp(state, "suspended"))
1347			return MIC_SUSPENDED;
1348		mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1349		/* Invalid state */
1350		assert(0);
1351	};
1352	
1353	static void mic_handle_shutdown(struct mic_info *mic)
1354	{
1355	#define SHUTDOWN_TIMEOUT 60
1356		int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
1357		char *shutdown_status;
1358		while (i) {
1359			shutdown_status = readsysfs(mic->name, "shutdown_status");
1360			if (!shutdown_status)
1361				continue;
1362			mpsslog("%s: %s %d shutdown_status %s\n",
1363				mic->name, __func__, __LINE__, shutdown_status);
1364			switch (get_mic_shutdown_status(mic, shutdown_status)) {
1365			case MIC_RESTART:
1366				mic->restart = 1;
1367			case MIC_HALTED:
1368			case MIC_POWER_OFF:
1369			case MIC_CRASHED:
1370				free(shutdown_status);
1371				goto reset;
1372			default:
1373				break;
1374			}
1375			free(shutdown_status);
1376			sleep(1);
1377			i--;
1378		}
1379	reset:
1380		ret = kill(mic->pid, SIGTERM);
1381		mpsslog("%s: %s %d kill pid %d ret %d\n",
1382			mic->name, __func__, __LINE__,
1383			mic->pid, ret);
1384		if (!ret) {
1385			ret = waitpid(mic->pid, &stat,
1386				WIFSIGNALED(stat));
1387			mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1388				mic->name, __func__, __LINE__,
1389				ret, mic->pid);
1390		}
1391		if (ret == mic->pid)
1392			reset(mic);
1393	}
1394	
1395	static void *
1396	mic_config(void *arg)
1397	{
1398		struct mic_info *mic = (struct mic_info *)arg;
1399		char *state = NULL;
1400		char pathname[PATH_MAX];
1401		int fd, ret;
1402		struct pollfd ufds[1];
1403		char value[4096];
1404	
1405		snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1406			 MICSYSFSDIR, mic->name, "state");
1407	
1408		fd = open(pathname, O_RDONLY);
1409		if (fd < 0) {
1410			mpsslog("%s: opening file %s failed %s\n",
1411				mic->name, pathname, strerror(errno));
1412			goto error;
1413		}
1414	
1415		do {
1416			ret = lseek(fd, 0, SEEK_SET);
1417			if (ret < 0) {
1418				mpsslog("%s: Failed to seek to file start '%s': %s\n",
1419					mic->name, pathname, strerror(errno));
1420				goto close_error1;
1421			}
1422			ret = read(fd, value, sizeof(value));
1423			if (ret < 0) {
1424				mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
1425					mic->name, pathname, strerror(errno));
1426				goto close_error1;
1427			}
1428	retry:
1429			state = readsysfs(mic->name, "state");
1430			if (!state)
1431				goto retry;
1432			mpsslog("%s: %s %d state %s\n",
1433				mic->name, __func__, __LINE__, state);
1434			switch (get_mic_state(mic, state)) {
1435			case MIC_SHUTTING_DOWN:
1436				mic_handle_shutdown(mic);
1437				goto close_error;
1438			case MIC_SUSPENDING:
1439				mic->boot_on_resume = 1;
1440				setsysfs(mic->name, "state", "suspend");
1441				mic_handle_shutdown(mic);
1442				goto close_error;
1443			case MIC_OFFLINE:
1444				if (mic->boot_on_resume) {
1445					setsysfs(mic->name, "state", "boot");
1446					mic->boot_on_resume = 0;
1447				}
1448				break;
1449			default:
1450				break;
1451			}
1452			free(state);
1453	
1454			ufds[0].fd = fd;
1455			ufds[0].events = POLLERR | POLLPRI;
1456			ret = poll(ufds, 1, -1);
1457			if (ret < 0) {
1458				mpsslog("%s: poll failed %s\n",
1459					mic->name, strerror(errno));
1460				goto close_error1;
1461			}
1462		} while (1);
1463	close_error:
1464		free(state);
1465	close_error1:
1466		close(fd);
1467	error:
1468		init_mic(mic);
1469		pthread_exit(NULL);
1470	}
1471	
1472	static void
1473	set_cmdline(struct mic_info *mic)
1474	{
1475		char buffer[PATH_MAX];
1476		int len;
1477	
1478		len = snprintf(buffer, PATH_MAX,
1479			"clocksource=tsc highres=off nohz=off ");
1480		len += snprintf(buffer + len, PATH_MAX - len,
1481			"cpufreq_on;corec6_off;pc3_off;pc6_off ");
1482		len += snprintf(buffer + len, PATH_MAX - len,
1483			"ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1484			mic->id);
1485	
1486		setsysfs(mic->name, "cmdline", buffer);
1487		mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1488		snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
1489		mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1490	}
1491	
1492	static void
1493	set_log_buf_info(struct mic_info *mic)
1494	{
1495		int fd;
1496		off_t len;
1497		char system_map[] = "/lib/firmware/mic/System.map";
1498		char *map, *temp, log_buf[17] = {'\0'};
1499	
1500		fd = open(system_map, O_RDONLY);
1501		if (fd < 0) {
1502			mpsslog("%s: Opening System.map failed: %d\n",
1503				mic->name, errno);
1504			return;
1505		}
1506		len = lseek(fd, 0, SEEK_END);
1507		if (len < 0) {
1508			mpsslog("%s: Reading System.map size failed: %d\n",
1509				mic->name, errno);
1510			close(fd);
1511			return;
1512		}
1513		map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1514		if (map == MAP_FAILED) {
1515			mpsslog("%s: mmap of System.map failed: %d\n",
1516				mic->name, errno);
1517			close(fd);
1518			return;
1519		}
1520		temp = strstr(map, "__log_buf");
1521		if (!temp) {
1522			mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1523			munmap(map, len);
1524			close(fd);
1525			return;
1526		}
1527		strncpy(log_buf, temp - 19, 16);
1528		setsysfs(mic->name, "log_buf_addr", log_buf);
1529		mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1530		temp = strstr(map, "log_buf_len");
1531		if (!temp) {
1532			mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1533			munmap(map, len);
1534			close(fd);
1535			return;
1536		}
1537		strncpy(log_buf, temp - 19, 16);
1538		setsysfs(mic->name, "log_buf_len", log_buf);
1539		mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1540		munmap(map, len);
1541		close(fd);
1542	}
1543	
1544	static void init_mic(struct mic_info *mic);
1545	
1546	static void
1547	change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1548	{
1549		struct mic_info *mic;
1550	
1551		for (mic = mic_list.next; mic != NULL; mic = mic->next)
1552			mic->mic_virtblk.signaled = 1/* true */;
1553	}
1554	
1555	static void
1556	init_mic(struct mic_info *mic)
1557	{
1558		struct sigaction ignore = {
1559			.sa_flags = 0,
1560			.sa_handler = SIG_IGN
1561		};
1562		struct sigaction act = {
1563			.sa_flags = SA_SIGINFO,
1564			.sa_sigaction = change_virtblk_backend,
1565		};
1566		char buffer[PATH_MAX];
1567		int err;
1568	
1569		/*
1570		 * Currently, one virtio block device is supported for each MIC card
1571		 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1572		 * The signal informs the virtio block backend about a change in the
1573		 * configuration file which specifies the virtio backend file name on
1574		 * the host. Virtio block backend then re-reads the configuration file
1575		 * and switches to the new block device. This signalling mechanism may
1576		 * not be required once multiple virtio block devices are supported by
1577		 * the MIC daemon.
1578		 */
1579		sigaction(SIGUSR1, &ignore, NULL);
1580	
1581		mic->pid = fork();
1582		switch (mic->pid) {
1583		case 0:
1584			set_log_buf_info(mic);
1585			set_cmdline(mic);
1586			add_virtio_device(mic, &virtcons_dev_page.dd);
1587			add_virtio_device(mic, &virtnet_dev_page.dd);
1588			err = pthread_create(&mic->mic_console.console_thread, NULL,
1589				virtio_console, mic);
1590			if (err)
1591				mpsslog("%s virtcons pthread_create failed %s\n",
1592					mic->name, strerror(err));
1593			err = pthread_create(&mic->mic_net.net_thread, NULL,
1594				virtio_net, mic);
1595			if (err)
1596				mpsslog("%s virtnet pthread_create failed %s\n",
1597					mic->name, strerror(err));
1598			err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1599				virtio_block, mic);
1600			if (err)
1601				mpsslog("%s virtblk pthread_create failed %s\n",
1602					mic->name, strerror(err));
1603			sigemptyset(&act.sa_mask);
1604			err = sigaction(SIGUSR1, &act, NULL);
1605			if (err)
1606				mpsslog("%s sigaction SIGUSR1 failed %s\n",
1607					mic->name, strerror(errno));
1608			while (1)
1609				sleep(60);
1610		case -1:
1611			mpsslog("fork failed MIC name %s id %d errno %d\n",
1612				mic->name, mic->id, errno);
1613			break;
1614		default:
1615			if (mic->restart) {
1616				snprintf(buffer, PATH_MAX, "boot");
1617				setsysfs(mic->name, "state", buffer);
1618				mpsslog("%s restarting mic %d\n",
1619					mic->name, mic->restart);
1620				mic->restart = 0;
1621			}
1622			pthread_create(&mic->config_thread, NULL, mic_config, mic);
1623		}
1624	}
1625	
1626	static void
1627	start_daemon(void)
1628	{
1629		struct mic_info *mic;
1630	
1631		for (mic = mic_list.next; mic != NULL; mic = mic->next)
1632			init_mic(mic);
1633	
1634		while (1)
1635			sleep(60);
1636	}
1637	
1638	static int
1639	init_mic_list(void)
1640	{
1641		struct mic_info *mic = &mic_list;
1642		struct dirent *file;
1643		DIR *dp;
1644		int cnt = 0;
1645	
1646		dp = opendir(MICSYSFSDIR);
1647		if (!dp)
1648			return 0;
1649	
1650		while ((file = readdir(dp)) != NULL) {
1651			if (!strncmp(file->d_name, "mic", 3)) {
1652				mic->next = calloc(1, sizeof(struct mic_info));
1653				if (mic->next) {
1654					mic = mic->next;
1655					mic->id = atoi(&file->d_name[3]);
1656					mic->name = malloc(strlen(file->d_name) + 16);
1657					if (mic->name)
1658						strcpy(mic->name, file->d_name);
1659					mpsslog("MIC name %s id %d\n", mic->name,
1660						mic->id);
1661					cnt++;
1662				}
1663			}
1664		}
1665	
1666		closedir(dp);
1667		return cnt;
1668	}
1669	
1670	void
1671	mpsslog(char *format, ...)
1672	{
1673		va_list args;
1674		char buffer[4096];
1675		char ts[52], *ts1;
1676		time_t t;
1677	
1678		if (logfp == NULL)
1679			return;
1680	
1681		va_start(args, format);
1682		vsprintf(buffer, format, args);
1683		va_end(args);
1684	
1685		time(&t);
1686		ts1 = ctime_r(&t, ts);
1687		ts1[strlen(ts1) - 1] = '\0';
1688		fprintf(logfp, "%s: %s", ts1, buffer);
1689	
1690		fflush(logfp);
1691	}
1692	
1693	int
1694	main(int argc, char *argv[])
1695	{
1696		int cnt;
1697		pid_t pid;
1698	
1699		myname = argv[0];
1700	
1701		logfp = fopen(LOGFILE_NAME, "a+");
1702		if (!logfp) {
1703			fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1704			exit(1);
1705		}
1706		pid = fork();
1707		switch (pid) {
1708		case 0:
1709			break;
1710		case -1:
1711			exit(2);
1712		default:
1713			exit(0);
1714		}
1715	
1716		mpsslog("MIC Daemon start\n");
1717	
1718		cnt = init_mic_list();
1719		if (cnt == 0) {
1720			mpsslog("MIC module not loaded\n");
1721			exit(3);
1722		}
1723		mpsslog("MIC found %d devices\n", cnt);
1724	
1725		start_daemon();
1726	
1727		exit(0);
1728	}
Hide Line Numbers
About Kernel Documentation Linux Kernel Contact Linux Resources Linux Blog

Information is copyright its respective author. All material is available from the Linux Kernel Source distributed under a GPL License. This page is provided as a free service by mjmwired.net.