About Kernel Documentation Linux Kernel Contact Linux Resources Linux Blog

Documentation / mic / mpssd / mpssd.c




Custom Search

Based on kernel version 4.7.2. Page generated on 2016-08-22 22:46 EST.

1	/*
2	 * Intel MIC Platform Software Stack (MPSS)
3	 *
4	 * Copyright(c) 2013 Intel Corporation.
5	 *
6	 * This program is free software; you can redistribute it and/or modify
7	 * it under the terms of the GNU General Public License, version 2, as
8	 * published by the Free Software Foundation.
9	 *
10	 * This program is distributed in the hope that it will be useful, but
11	 * WITHOUT ANY WARRANTY; without even the implied warranty of
12	 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	 * General Public License for more details.
14	 *
15	 * The full GNU General Public License is included in this distribution in
16	 * the file called "COPYING".
17	 *
18	 * Intel MIC User Space Tools.
19	 */
20	
21	#define _GNU_SOURCE
22	
23	#include <stdlib.h>
24	#include <fcntl.h>
25	#include <getopt.h>
26	#include <assert.h>
27	#include <unistd.h>
28	#include <stdbool.h>
29	#include <signal.h>
30	#include <poll.h>
31	#include <features.h>
32	#include <sys/types.h>
33	#include <sys/stat.h>
34	#include <sys/mman.h>
35	#include <sys/socket.h>
36	#include <linux/virtio_ring.h>
37	#include <linux/virtio_net.h>
38	#include <linux/virtio_console.h>
39	#include <linux/virtio_blk.h>
40	#include <linux/version.h>
41	#include "mpssd.h"
42	#include <linux/mic_ioctl.h>
43	#include <linux/mic_common.h>
44	#include <tools/endian.h>
45	
46	static void *init_mic(void *arg);
47	
48	static FILE *logfp;
49	static struct mic_info mic_list;
50	
51	#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
52	
53	#define min_t(type, x, y) ({				\
54			type __min1 = (x);                      \
55			type __min2 = (y);                      \
56			__min1 < __min2 ? __min1 : __min2; })
57	
58	/* align addr on a size boundary - adjust address up/down if needed */
59	#define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
60	#define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
61	
62	/* align addr on a size boundary - adjust address up if needed */
63	#define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
64	
65	/* to align the pointer to the (next) page boundary */
66	#define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
67	
68	#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
69	
70	#define GSO_ENABLED		1
71	#define MAX_GSO_SIZE		(64 * 1024)
72	#define ETH_H_LEN		14
73	#define MAX_NET_PKT_SIZE	(_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
74	#define MIC_DEVICE_PAGE_END	0x1000
75	
76	#ifndef VIRTIO_NET_HDR_F_DATA_VALID
77	#define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
78	#endif
79	
80	static struct {
81		struct mic_device_desc dd;
82		struct mic_vqconfig vqconfig[2];
83		__u32 host_features, guest_acknowledgements;
84		struct virtio_console_config cons_config;
85	} virtcons_dev_page = {
86		.dd = {
87			.type = VIRTIO_ID_CONSOLE,
88			.num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
89			.feature_len = sizeof(virtcons_dev_page.host_features),
90			.config_len = sizeof(virtcons_dev_page.cons_config),
91		},
92		.vqconfig[0] = {
93			.num = htole16(MIC_VRING_ENTRIES),
94		},
95		.vqconfig[1] = {
96			.num = htole16(MIC_VRING_ENTRIES),
97		},
98	};
99	
100	static struct {
101		struct mic_device_desc dd;
102		struct mic_vqconfig vqconfig[2];
103		__u32 host_features, guest_acknowledgements;
104		struct virtio_net_config net_config;
105	} virtnet_dev_page = {
106		.dd = {
107			.type = VIRTIO_ID_NET,
108			.num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
109			.feature_len = sizeof(virtnet_dev_page.host_features),
110			.config_len = sizeof(virtnet_dev_page.net_config),
111		},
112		.vqconfig[0] = {
113			.num = htole16(MIC_VRING_ENTRIES),
114		},
115		.vqconfig[1] = {
116			.num = htole16(MIC_VRING_ENTRIES),
117		},
118	#if GSO_ENABLED
119		.host_features = htole32(
120			1 << VIRTIO_NET_F_CSUM |
121			1 << VIRTIO_NET_F_GSO |
122			1 << VIRTIO_NET_F_GUEST_TSO4 |
123			1 << VIRTIO_NET_F_GUEST_TSO6 |
124			1 << VIRTIO_NET_F_GUEST_ECN),
125	#else
126			.host_features = 0,
127	#endif
128	};
129	
130	static const char *mic_config_dir = "/etc/mpss";
131	static const char *virtblk_backend = "VIRTBLK_BACKEND";
132	static struct {
133		struct mic_device_desc dd;
134		struct mic_vqconfig vqconfig[1];
135		__u32 host_features, guest_acknowledgements;
136		struct virtio_blk_config blk_config;
137	} virtblk_dev_page = {
138		.dd = {
139			.type = VIRTIO_ID_BLOCK,
140			.num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
141			.feature_len = sizeof(virtblk_dev_page.host_features),
142			.config_len = sizeof(virtblk_dev_page.blk_config),
143		},
144		.vqconfig[0] = {
145			.num = htole16(MIC_VRING_ENTRIES),
146		},
147		.host_features =
148			htole32(1<<VIRTIO_BLK_F_SEG_MAX),
149		.blk_config = {
150			.seg_max = htole32(MIC_VRING_ENTRIES - 2),
151			.capacity = htole64(0),
152		 }
153	};
154	
155	static char *myname;
156	
157	static int
158	tap_configure(struct mic_info *mic, char *dev)
159	{
160		pid_t pid;
161		char *ifargv[7];
162		char ipaddr[IFNAMSIZ];
163		int ret = 0;
164	
165		pid = fork();
166		if (pid == 0) {
167			ifargv[0] = "ip";
168			ifargv[1] = "link";
169			ifargv[2] = "set";
170			ifargv[3] = dev;
171			ifargv[4] = "up";
172			ifargv[5] = NULL;
173			mpsslog("Configuring %s\n", dev);
174			ret = execvp("ip", ifargv);
175			if (ret < 0) {
176				mpsslog("%s execvp failed errno %s\n",
177					mic->name, strerror(errno));
178				return ret;
179			}
180		}
181		if (pid < 0) {
182			mpsslog("%s fork failed errno %s\n",
183				mic->name, strerror(errno));
184			return ret;
185		}
186	
187		ret = waitpid(pid, NULL, 0);
188		if (ret < 0) {
189			mpsslog("%s waitpid failed errno %s\n",
190				mic->name, strerror(errno));
191			return ret;
192		}
193	
194		snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
195	
196		pid = fork();
197		if (pid == 0) {
198			ifargv[0] = "ip";
199			ifargv[1] = "addr";
200			ifargv[2] = "add";
201			ifargv[3] = ipaddr;
202			ifargv[4] = "dev";
203			ifargv[5] = dev;
204			ifargv[6] = NULL;
205			mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
206			ret = execvp("ip", ifargv);
207			if (ret < 0) {
208				mpsslog("%s execvp failed errno %s\n",
209					mic->name, strerror(errno));
210				return ret;
211			}
212		}
213		if (pid < 0) {
214			mpsslog("%s fork failed errno %s\n",
215				mic->name, strerror(errno));
216			return ret;
217		}
218	
219		ret = waitpid(pid, NULL, 0);
220		if (ret < 0) {
221			mpsslog("%s waitpid failed errno %s\n",
222				mic->name, strerror(errno));
223			return ret;
224		}
225		mpsslog("MIC name %s %s %d DONE!\n",
226			mic->name, __func__, __LINE__);
227		return 0;
228	}
229	
230	static int tun_alloc(struct mic_info *mic, char *dev)
231	{
232		struct ifreq ifr;
233		int fd, err;
234	#if GSO_ENABLED
235		unsigned offload;
236	#endif
237		fd = open("/dev/net/tun", O_RDWR);
238		if (fd < 0) {
239			mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
240			goto done;
241		}
242	
243		memset(&ifr, 0, sizeof(ifr));
244	
245		ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
246		if (*dev)
247			strncpy(ifr.ifr_name, dev, IFNAMSIZ);
248	
249		err = ioctl(fd, TUNSETIFF, (void *)&ifr);
250		if (err < 0) {
251			mpsslog("%s %s %d TUNSETIFF failed %s\n",
252				mic->name, __func__, __LINE__, strerror(errno));
253			close(fd);
254			return err;
255		}
256	#if GSO_ENABLED
257		offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
258	
259		err = ioctl(fd, TUNSETOFFLOAD, offload);
260		if (err < 0) {
261			mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
262				mic->name, __func__, __LINE__, strerror(errno));
263			close(fd);
264			return err;
265		}
266	#endif
267		strcpy(dev, ifr.ifr_name);
268		mpsslog("Created TAP %s\n", dev);
269	done:
270		return fd;
271	}
272	
273	#define NET_FD_VIRTIO_NET 0
274	#define NET_FD_TUN 1
275	#define MAX_NET_FD 2
276	
277	static void set_dp(struct mic_info *mic, int type, void *dp)
278	{
279		switch (type) {
280		case VIRTIO_ID_CONSOLE:
281			mic->mic_console.console_dp = dp;
282			return;
283		case VIRTIO_ID_NET:
284			mic->mic_net.net_dp = dp;
285			return;
286		case VIRTIO_ID_BLOCK:
287			mic->mic_virtblk.block_dp = dp;
288			return;
289		}
290		mpsslog("%s %s %d not found\n", mic->name, __func__, type);
291		assert(0);
292	}
293	
294	static void *get_dp(struct mic_info *mic, int type)
295	{
296		switch (type) {
297		case VIRTIO_ID_CONSOLE:
298			return mic->mic_console.console_dp;
299		case VIRTIO_ID_NET:
300			return mic->mic_net.net_dp;
301		case VIRTIO_ID_BLOCK:
302			return mic->mic_virtblk.block_dp;
303		}
304		mpsslog("%s %s %d not found\n", mic->name, __func__, type);
305		assert(0);
306		return NULL;
307	}
308	
309	static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
310	{
311		struct mic_device_desc *d;
312		int i;
313		void *dp = get_dp(mic, type);
314	
315		for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
316			i += mic_total_desc_size(d)) {
317			d = dp + i;
318	
319			/* End of list */
320			if (d->type == 0)
321				break;
322	
323			if (d->type == -1)
324				continue;
325	
326			mpsslog("%s %s d-> type %d d %p\n",
327				mic->name, __func__, d->type, d);
328	
329			if (d->type == (__u8)type)
330				return d;
331		}
332		mpsslog("%s %s %d not found\n", mic->name, __func__, type);
333		return NULL;
334	}
335	
336	/* See comments in vhost.c for explanation of next_desc() */
337	static unsigned next_desc(struct vring_desc *desc)
338	{
339		unsigned int next;
340	
341		if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
342			return -1U;
343		next = le16toh(desc->next);
344		return next;
345	}
346	
347	/* Sum up all the IOVEC length */
348	static ssize_t
349	sum_iovec_len(struct mic_copy_desc *copy)
350	{
351		ssize_t sum = 0;
352		unsigned int i;
353	
354		for (i = 0; i < copy->iovcnt; i++)
355			sum += copy->iov[i].iov_len;
356		return sum;
357	}
358	
359	static inline void verify_out_len(struct mic_info *mic,
360		struct mic_copy_desc *copy)
361	{
362		if (copy->out_len != sum_iovec_len(copy)) {
363			mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
364				mic->name, __func__, __LINE__,
365				copy->out_len, sum_iovec_len(copy));
366			assert(copy->out_len == sum_iovec_len(copy));
367		}
368	}
369	
370	/* Display an iovec */
371	static void
372	disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
373		   const char *s, int line)
374	{
375		unsigned int i;
376	
377		for (i = 0; i < copy->iovcnt; i++)
378			mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
379				mic->name, s, line, i,
380				copy->iov[i].iov_base, copy->iov[i].iov_len);
381	}
382	
383	static inline __u16 read_avail_idx(struct mic_vring *vr)
384	{
385		return ACCESS_ONCE(vr->info->avail_idx);
386	}
387	
388	static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
389					struct mic_copy_desc *copy, ssize_t len)
390	{
391		copy->vr_idx = tx ? 0 : 1;
392		copy->update_used = true;
393		if (type == VIRTIO_ID_NET)
394			copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
395		else
396			copy->iov[0].iov_len = len;
397	}
398	
399	/* Central API which triggers the copies */
400	static int
401	mic_virtio_copy(struct mic_info *mic, int fd,
402			struct mic_vring *vr, struct mic_copy_desc *copy)
403	{
404		int ret;
405	
406		ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
407		if (ret) {
408			mpsslog("%s %s %d errno %s ret %d\n",
409				mic->name, __func__, __LINE__,
410				strerror(errno), ret);
411		}
412		return ret;
413	}
414	
415	static inline unsigned _vring_size(unsigned int num, unsigned long align)
416	{
417		return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
418					+ align - 1) & ~(align - 1))
419			+ sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
420	}
421	
422	/*
423	 * This initialization routine requires at least one
424	 * vring i.e. vr0. vr1 is optional.
425	 */
426	static void *
427	init_vr(struct mic_info *mic, int fd, int type,
428		struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
429	{
430		int vr_size;
431		char *va;
432	
433		vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
434						 MIC_VIRTIO_RING_ALIGN) +
435				     sizeof(struct _mic_vring_info));
436		va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
437			PROT_READ, MAP_SHARED, fd, 0);
438		if (MAP_FAILED == va) {
439			mpsslog("%s %s %d mmap failed errno %s\n",
440				mic->name, __func__, __LINE__,
441				strerror(errno));
442			goto done;
443		}
444		set_dp(mic, type, va);
445		vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
446		vr0->info = vr0->va +
447			_vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
448		vring_init(&vr0->vr,
449			   MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
450		mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
451			__func__, mic->name, vr0->va, vr0->info, vr_size,
452			_vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
453		mpsslog("magic 0x%x expected 0x%x\n",
454			le32toh(vr0->info->magic), MIC_MAGIC + type);
455		assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
456		if (vr1) {
457			vr1->va = (struct mic_vring *)
458				&va[MIC_DEVICE_PAGE_END + vr_size];
459			vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
460				MIC_VIRTIO_RING_ALIGN);
461			vring_init(&vr1->vr,
462				   MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
463			mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
464				__func__, mic->name, vr1->va, vr1->info, vr_size,
465				_vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
466			mpsslog("magic 0x%x expected 0x%x\n",
467				le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
468			assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
469		}
470	done:
471		return va;
472	}
473	
474	static int
475	wait_for_card_driver(struct mic_info *mic, int fd, int type)
476	{
477		struct pollfd pollfd;
478		int err;
479		struct mic_device_desc *desc = get_device_desc(mic, type);
480		__u8 prev_status;
481	
482		if (!desc)
483			return -ENODEV;
484		prev_status = desc->status;
485		pollfd.fd = fd;
486		mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
487			mic->name, __func__, type, desc->status);
488	
489		while (1) {
490			pollfd.events = POLLIN;
491			pollfd.revents = 0;
492			err = poll(&pollfd, 1, -1);
493			if (err < 0) {
494				mpsslog("%s %s poll failed %s\n",
495					mic->name, __func__, strerror(errno));
496				continue;
497			}
498	
499			if (pollfd.revents) {
500				if (desc->status != prev_status) {
501					mpsslog("%s %s Waiting... desc-> type %d "
502						"status 0x%x\n",
503						mic->name, __func__, type,
504						desc->status);
505					prev_status = desc->status;
506				}
507				if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
508					mpsslog("%s %s poll.revents %d\n",
509						mic->name, __func__, pollfd.revents);
510					mpsslog("%s %s desc-> type %d status 0x%x\n",
511						mic->name, __func__, type,
512						desc->status);
513					break;
514				}
515			}
516		}
517		return 0;
518	}
519	
520	/* Spin till we have some descriptors */
521	static void
522	spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
523	{
524		__u16 avail_idx = read_avail_idx(vr);
525	
526		while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
527	#ifdef DEBUG
528			mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
529				mic->name, __func__,
530				le16toh(vr->vr.avail->idx), vr->info->avail_idx);
531	#endif
532			sched_yield();
533		}
534	}
535	
536	static void *
537	virtio_net(void *arg)
538	{
539		static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
540		static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
541		struct iovec vnet_iov[2][2] = {
542			{ { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
543			  { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
544			{ { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
545			  { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
546		};
547		struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
548		struct mic_info *mic = (struct mic_info *)arg;
549		char if_name[IFNAMSIZ];
550		struct pollfd net_poll[MAX_NET_FD];
551		struct mic_vring tx_vr, rx_vr;
552		struct mic_copy_desc copy;
553		struct mic_device_desc *desc;
554		int err;
555	
556		snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
557		mic->mic_net.tap_fd = tun_alloc(mic, if_name);
558		if (mic->mic_net.tap_fd < 0)
559			goto done;
560	
561		if (tap_configure(mic, if_name))
562			goto done;
563		mpsslog("MIC name %s id %d\n", mic->name, mic->id);
564	
565		net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
566		net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
567		net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
568		net_poll[NET_FD_TUN].events = POLLIN;
569	
570		if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
571					  VIRTIO_ID_NET, &tx_vr, &rx_vr,
572			virtnet_dev_page.dd.num_vq)) {
573			mpsslog("%s init_vr failed %s\n",
574				mic->name, strerror(errno));
575			goto done;
576		}
577	
578		copy.iovcnt = 2;
579		desc = get_device_desc(mic, VIRTIO_ID_NET);
580	
581		while (1) {
582			ssize_t len;
583	
584			net_poll[NET_FD_VIRTIO_NET].revents = 0;
585			net_poll[NET_FD_TUN].revents = 0;
586	
587			/* Start polling for data from tap and virtio net */
588			err = poll(net_poll, 2, -1);
589			if (err < 0) {
590				mpsslog("%s poll failed %s\n",
591					__func__, strerror(errno));
592				continue;
593			}
594			if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
595				err = wait_for_card_driver(mic,
596							   mic->mic_net.virtio_net_fd,
597							   VIRTIO_ID_NET);
598				if (err) {
599					mpsslog("%s %s %d Exiting...\n",
600						mic->name, __func__, __LINE__);
601					break;
602				}
603			}
604			/*
605			 * Check if there is data to be read from TUN and write to
606			 * virtio net fd if there is.
607			 */
608			if (net_poll[NET_FD_TUN].revents & POLLIN) {
609				copy.iov = iov0;
610				len = readv(net_poll[NET_FD_TUN].fd,
611					copy.iov, copy.iovcnt);
612				if (len > 0) {
613					struct virtio_net_hdr *hdr
614						= (struct virtio_net_hdr *)vnet_hdr[0];
615	
616					/* Disable checksums on the card since we are on
617					   a reliable PCIe link */
618					hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
619	#ifdef DEBUG
620					mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
621						__func__, __LINE__, hdr->flags);
622					mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
623						copy.out_len, hdr->gso_type);
624	#endif
625	#ifdef DEBUG
626					disp_iovec(mic, copy, __func__, __LINE__);
627					mpsslog("%s %s %d read from tap 0x%lx\n",
628						mic->name, __func__, __LINE__,
629						len);
630	#endif
631					spin_for_descriptors(mic, &tx_vr);
632					txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
633						     len);
634	
635					err = mic_virtio_copy(mic,
636						mic->mic_net.virtio_net_fd, &tx_vr,
637						&copy);
638					if (err < 0) {
639						mpsslog("%s %s %d mic_virtio_copy %s\n",
640							mic->name, __func__, __LINE__,
641							strerror(errno));
642					}
643					if (!err)
644						verify_out_len(mic, &copy);
645	#ifdef DEBUG
646					disp_iovec(mic, copy, __func__, __LINE__);
647					mpsslog("%s %s %d wrote to net 0x%lx\n",
648						mic->name, __func__, __LINE__,
649						sum_iovec_len(&copy));
650	#endif
651					/* Reinitialize IOV for next run */
652					iov0[1].iov_len = MAX_NET_PKT_SIZE;
653				} else if (len < 0) {
654					disp_iovec(mic, &copy, __func__, __LINE__);
655					mpsslog("%s %s %d read failed %s ", mic->name,
656						__func__, __LINE__, strerror(errno));
657					mpsslog("cnt %d sum %zd\n",
658						copy.iovcnt, sum_iovec_len(&copy));
659				}
660			}
661	
662			/*
663			 * Check if there is data to be read from virtio net and
664			 * write to TUN if there is.
665			 */
666			if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
667				while (rx_vr.info->avail_idx !=
668					le16toh(rx_vr.vr.avail->idx)) {
669					copy.iov = iov1;
670					txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
671						     MAX_NET_PKT_SIZE
672						+ sizeof(struct virtio_net_hdr));
673	
674					err = mic_virtio_copy(mic,
675						mic->mic_net.virtio_net_fd, &rx_vr,
676						&copy);
677					if (!err) {
678	#ifdef DEBUG
679						struct virtio_net_hdr *hdr
680							= (struct virtio_net_hdr *)
681								vnet_hdr[1];
682	
683						mpsslog("%s %s %d hdr->flags 0x%x, ",
684							mic->name, __func__, __LINE__,
685							hdr->flags);
686						mpsslog("out_len %d gso_type 0x%x\n",
687							copy.out_len,
688							hdr->gso_type);
689	#endif
690						/* Set the correct output iov_len */
691						iov1[1].iov_len = copy.out_len -
692							sizeof(struct virtio_net_hdr);
693						verify_out_len(mic, &copy);
694	#ifdef DEBUG
695						disp_iovec(mic, copy, __func__,
696							   __LINE__);
697						mpsslog("%s %s %d ",
698							mic->name, __func__, __LINE__);
699						mpsslog("read from net 0x%lx\n",
700							sum_iovec_len(copy));
701	#endif
702						len = writev(net_poll[NET_FD_TUN].fd,
703							copy.iov, copy.iovcnt);
704						if (len != sum_iovec_len(&copy)) {
705							mpsslog("Tun write failed %s ",
706								strerror(errno));
707							mpsslog("len 0x%zx ", len);
708							mpsslog("read_len 0x%zx\n",
709								sum_iovec_len(&copy));
710						} else {
711	#ifdef DEBUG
712							disp_iovec(mic, &copy, __func__,
713								   __LINE__);
714							mpsslog("%s %s %d ",
715								mic->name, __func__,
716								__LINE__);
717							mpsslog("wrote to tap 0x%lx\n",
718								len);
719	#endif
720						}
721					} else {
722						mpsslog("%s %s %d mic_virtio_copy %s\n",
723							mic->name, __func__, __LINE__,
724							strerror(errno));
725						break;
726					}
727				}
728			}
729			if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
730				mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
731		}
732	done:
733		pthread_exit(NULL);
734	}
735	
736	/* virtio_console */
737	#define VIRTIO_CONSOLE_FD 0
738	#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
739	#define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
740	#define MAX_BUFFER_SIZE PAGE_SIZE
741	
742	static void *
743	virtio_console(void *arg)
744	{
745		static __u8 vcons_buf[2][PAGE_SIZE];
746		struct iovec vcons_iov[2] = {
747			{ .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
748			{ .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
749		};
750		struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
751		struct mic_info *mic = (struct mic_info *)arg;
752		int err;
753		struct pollfd console_poll[MAX_CONSOLE_FD];
754		int pty_fd;
755		char *pts_name;
756		ssize_t len;
757		struct mic_vring tx_vr, rx_vr;
758		struct mic_copy_desc copy;
759		struct mic_device_desc *desc;
760	
761		pty_fd = posix_openpt(O_RDWR);
762		if (pty_fd < 0) {
763			mpsslog("can't open a pseudoterminal master device: %s\n",
764				strerror(errno));
765			goto _return;
766		}
767		pts_name = ptsname(pty_fd);
768		if (pts_name == NULL) {
769			mpsslog("can't get pts name\n");
770			goto _close_pty;
771		}
772		printf("%s console message goes to %s\n", mic->name, pts_name);
773		mpsslog("%s console message goes to %s\n", mic->name, pts_name);
774		err = grantpt(pty_fd);
775		if (err < 0) {
776			mpsslog("can't grant access: %s %s\n",
777				pts_name, strerror(errno));
778			goto _close_pty;
779		}
780		err = unlockpt(pty_fd);
781		if (err < 0) {
782			mpsslog("can't unlock a pseudoterminal: %s %s\n",
783				pts_name, strerror(errno));
784			goto _close_pty;
785		}
786		console_poll[MONITOR_FD].fd = pty_fd;
787		console_poll[MONITOR_FD].events = POLLIN;
788	
789		console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
790		console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
791	
792		if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
793					  VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
794			virtcons_dev_page.dd.num_vq)) {
795			mpsslog("%s init_vr failed %s\n",
796				mic->name, strerror(errno));
797			goto _close_pty;
798		}
799	
800		copy.iovcnt = 1;
801		desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
802	
803		for (;;) {
804			console_poll[MONITOR_FD].revents = 0;
805			console_poll[VIRTIO_CONSOLE_FD].revents = 0;
806			err = poll(console_poll, MAX_CONSOLE_FD, -1);
807			if (err < 0) {
808				mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
809					strerror(errno));
810				continue;
811			}
812			if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
813				err = wait_for_card_driver(mic,
814						mic->mic_console.virtio_console_fd,
815						VIRTIO_ID_CONSOLE);
816				if (err) {
817					mpsslog("%s %s %d Exiting...\n",
818						mic->name, __func__, __LINE__);
819					break;
820				}
821			}
822	
823			if (console_poll[MONITOR_FD].revents & POLLIN) {
824				copy.iov = iov0;
825				len = readv(pty_fd, copy.iov, copy.iovcnt);
826				if (len > 0) {
827	#ifdef DEBUG
828					disp_iovec(mic, copy, __func__, __LINE__);
829					mpsslog("%s %s %d read from tap 0x%lx\n",
830						mic->name, __func__, __LINE__,
831						len);
832	#endif
833					spin_for_descriptors(mic, &tx_vr);
834					txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
835						     &copy, len);
836	
837					err = mic_virtio_copy(mic,
838						mic->mic_console.virtio_console_fd,
839						&tx_vr, &copy);
840					if (err < 0) {
841						mpsslog("%s %s %d mic_virtio_copy %s\n",
842							mic->name, __func__, __LINE__,
843							strerror(errno));
844					}
845					if (!err)
846						verify_out_len(mic, &copy);
847	#ifdef DEBUG
848					disp_iovec(mic, copy, __func__, __LINE__);
849					mpsslog("%s %s %d wrote to net 0x%lx\n",
850						mic->name, __func__, __LINE__,
851						sum_iovec_len(copy));
852	#endif
853					/* Reinitialize IOV for next run */
854					iov0->iov_len = PAGE_SIZE;
855				} else if (len < 0) {
856					disp_iovec(mic, &copy, __func__, __LINE__);
857					mpsslog("%s %s %d read failed %s ",
858						mic->name, __func__, __LINE__,
859						strerror(errno));
860					mpsslog("cnt %d sum %zd\n",
861						copy.iovcnt, sum_iovec_len(&copy));
862				}
863			}
864	
865			if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
866				while (rx_vr.info->avail_idx !=
867					le16toh(rx_vr.vr.avail->idx)) {
868					copy.iov = iov1;
869					txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
870						     &copy, PAGE_SIZE);
871	
872					err = mic_virtio_copy(mic,
873						mic->mic_console.virtio_console_fd,
874						&rx_vr, &copy);
875					if (!err) {
876						/* Set the correct output iov_len */
877						iov1->iov_len = copy.out_len;
878						verify_out_len(mic, &copy);
879	#ifdef DEBUG
880						disp_iovec(mic, copy, __func__,
881							   __LINE__);
882						mpsslog("%s %s %d ",
883							mic->name, __func__, __LINE__);
884						mpsslog("read from net 0x%lx\n",
885							sum_iovec_len(copy));
886	#endif
887						len = writev(pty_fd,
888							copy.iov, copy.iovcnt);
889						if (len != sum_iovec_len(&copy)) {
890							mpsslog("Tun write failed %s ",
891								strerror(errno));
892							mpsslog("len 0x%zx ", len);
893							mpsslog("read_len 0x%zx\n",
894								sum_iovec_len(&copy));
895						} else {
896	#ifdef DEBUG
897							disp_iovec(mic, copy, __func__,
898								   __LINE__);
899							mpsslog("%s %s %d ",
900								mic->name, __func__,
901								__LINE__);
902							mpsslog("wrote to tap 0x%lx\n",
903								len);
904	#endif
905						}
906					} else {
907						mpsslog("%s %s %d mic_virtio_copy %s\n",
908							mic->name, __func__, __LINE__,
909							strerror(errno));
910						break;
911					}
912				}
913			}
914			if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
915				mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
916		}
917	_close_pty:
918		close(pty_fd);
919	_return:
920		pthread_exit(NULL);
921	}
922	
923	static void
924	add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
925	{
926		char path[PATH_MAX];
927		int fd, err;
928	
929		snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id);
930		fd = open(path, O_RDWR);
931		if (fd < 0) {
932			mpsslog("Could not open %s %s\n", path, strerror(errno));
933			return;
934		}
935	
936		err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
937		if (err < 0) {
938			mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
939			close(fd);
940			return;
941		}
942		switch (dd->type) {
943		case VIRTIO_ID_NET:
944			mic->mic_net.virtio_net_fd = fd;
945			mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
946			break;
947		case VIRTIO_ID_CONSOLE:
948			mic->mic_console.virtio_console_fd = fd;
949			mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
950			break;
951		case VIRTIO_ID_BLOCK:
952			mic->mic_virtblk.virtio_block_fd = fd;
953			mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
954			break;
955		}
956	}
957	
958	static bool
959	set_backend_file(struct mic_info *mic)
960	{
961		FILE *config;
962		char buff[PATH_MAX], *line, *evv, *p;
963	
964		snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
965		config = fopen(buff, "r");
966		if (config == NULL)
967			return false;
968		do {  /* look for "virtblk_backend=XXXX" */
969			line = fgets(buff, PATH_MAX, config);
970			if (line == NULL)
971				break;
972			if (*line == '#')
973				continue;
974			p = strchr(line, '\n');
975			if (p)
976				*p = '\0';
977		} while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
978		fclose(config);
979		if (line == NULL)
980			return false;
981		evv = strchr(line, '=');
982		if (evv == NULL)
983			return false;
984		mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
985		if (mic->mic_virtblk.backend_file == NULL) {
986			mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
987			return false;
988		}
989		strcpy(mic->mic_virtblk.backend_file, evv + 1);
990		return true;
991	}
992	
993	#define SECTOR_SIZE 512
994	static bool
995	set_backend_size(struct mic_info *mic)
996	{
997		mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
998			SEEK_END);
999		if (mic->mic_virtblk.backend_size < 0) {
1000			mpsslog("%s: can't seek: %s\n",
1001				mic->name, mic->mic_virtblk.backend_file);
1002			return false;
1003		}
1004		virtblk_dev_page.blk_config.capacity =
1005			mic->mic_virtblk.backend_size / SECTOR_SIZE;
1006		if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
1007			virtblk_dev_page.blk_config.capacity++;
1008	
1009		virtblk_dev_page.blk_config.capacity =
1010			htole64(virtblk_dev_page.blk_config.capacity);
1011	
1012		return true;
1013	}
1014	
1015	static bool
1016	open_backend(struct mic_info *mic)
1017	{
1018		if (!set_backend_file(mic))
1019			goto _error_exit;
1020		mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
1021		if (mic->mic_virtblk.backend < 0) {
1022			mpsslog("%s: can't open: %s\n", mic->name,
1023				mic->mic_virtblk.backend_file);
1024			goto _error_free;
1025		}
1026		if (!set_backend_size(mic))
1027			goto _error_close;
1028		mic->mic_virtblk.backend_addr = mmap(NULL,
1029			mic->mic_virtblk.backend_size,
1030			PROT_READ|PROT_WRITE, MAP_SHARED,
1031			mic->mic_virtblk.backend, 0L);
1032		if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1033			mpsslog("%s: can't map: %s %s\n",
1034				mic->name, mic->mic_virtblk.backend_file,
1035				strerror(errno));
1036			goto _error_close;
1037		}
1038		return true;
1039	
1040	 _error_close:
1041		close(mic->mic_virtblk.backend);
1042	 _error_free:
1043		free(mic->mic_virtblk.backend_file);
1044	 _error_exit:
1045		return false;
1046	}
1047	
1048	static void
1049	close_backend(struct mic_info *mic)
1050	{
1051		munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1052		close(mic->mic_virtblk.backend);
1053		free(mic->mic_virtblk.backend_file);
1054	}
1055	
1056	static bool
1057	start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1058	{
1059		if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1060			mpsslog("%s: blk_config is not 8 byte aligned.\n",
1061				mic->name);
1062			return false;
1063		}
1064		add_virtio_device(mic, &virtblk_dev_page.dd);
1065		if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1066					  VIRTIO_ID_BLOCK, vring, NULL,
1067					  virtblk_dev_page.dd.num_vq)) {
1068			mpsslog("%s init_vr failed %s\n",
1069				mic->name, strerror(errno));
1070			return false;
1071		}
1072		return true;
1073	}
1074	
1075	static void
1076	stop_virtblk(struct mic_info *mic)
1077	{
1078		int vr_size, ret;
1079	
1080		vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
1081						 MIC_VIRTIO_RING_ALIGN) +
1082				     sizeof(struct _mic_vring_info));
1083		ret = munmap(mic->mic_virtblk.block_dp,
1084			MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1085		if (ret < 0)
1086			mpsslog("%s munmap errno %d\n", mic->name, errno);
1087		close(mic->mic_virtblk.virtio_block_fd);
1088	}
1089	
1090	static __u8
1091	header_error_check(struct vring_desc *desc)
1092	{
1093		if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1094			mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1095				__func__, __LINE__);
1096			return -EIO;
1097		}
1098		if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1099			mpsslog("%s() %d: alone\n",
1100				__func__, __LINE__);
1101			return -EIO;
1102		}
1103		if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1104			mpsslog("%s() %d: not read\n",
1105				__func__, __LINE__);
1106			return -EIO;
1107		}
1108		return 0;
1109	}
1110	
1111	static int
1112	read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1113	{
1114		struct iovec iovec;
1115		struct mic_copy_desc copy;
1116	
1117		iovec.iov_len = sizeof(*hdr);
1118		iovec.iov_base = hdr;
1119		copy.iov = &iovec;
1120		copy.iovcnt = 1;
1121		copy.vr_idx = 0;  /* only one vring on virtio_block */
1122		copy.update_used = false;  /* do not update used index */
1123		return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1124	}
1125	
1126	static int
1127	transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1128	{
1129		struct mic_copy_desc copy;
1130	
1131		copy.iov = iovec;
1132		copy.iovcnt = iovcnt;
1133		copy.vr_idx = 0;  /* only one vring on virtio_block */
1134		copy.update_used = false;  /* do not update used index */
1135		return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1136	}
1137	
1138	static __u8
1139	status_error_check(struct vring_desc *desc)
1140	{
1141		if (le32toh(desc->len) != sizeof(__u8)) {
1142			mpsslog("%s() %d: length is not sizeof(status)\n",
1143				__func__, __LINE__);
1144			return -EIO;
1145		}
1146		return 0;
1147	}
1148	
1149	static int
1150	write_status(int fd, __u8 *status)
1151	{
1152		struct iovec iovec;
1153		struct mic_copy_desc copy;
1154	
1155		iovec.iov_base = status;
1156		iovec.iov_len = sizeof(*status);
1157		copy.iov = &iovec;
1158		copy.iovcnt = 1;
1159		copy.vr_idx = 0;  /* only one vring on virtio_block */
1160		copy.update_used = true; /* Update used index */
1161		return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1162	}
1163	
1164	#ifndef VIRTIO_BLK_T_GET_ID
1165	#define VIRTIO_BLK_T_GET_ID    8
1166	#endif
1167	
1168	static void *
1169	virtio_block(void *arg)
1170	{
1171		struct mic_info *mic = (struct mic_info *)arg;
1172		int ret;
1173		struct pollfd block_poll;
1174		struct mic_vring vring;
1175		__u16 avail_idx;
1176		__u32 desc_idx;
1177		struct vring_desc *desc;
1178		struct iovec *iovec, *piov;
1179		__u8 status;
1180		__u32 buffer_desc_idx;
1181		struct virtio_blk_outhdr hdr;
1182		void *fos;
1183	
1184		for (;;) {  /* forever */
1185			if (!open_backend(mic)) { /* No virtblk */
1186				for (mic->mic_virtblk.signaled = 0;
1187					!mic->mic_virtblk.signaled;)
1188					sleep(1);
1189				continue;
1190			}
1191	
1192			/* backend file is specified. */
1193			if (!start_virtblk(mic, &vring))
1194				goto _close_backend;
1195			iovec = malloc(sizeof(*iovec) *
1196				le32toh(virtblk_dev_page.blk_config.seg_max));
1197			if (!iovec) {
1198				mpsslog("%s: can't alloc iovec: %s\n",
1199					mic->name, strerror(ENOMEM));
1200				goto _stop_virtblk;
1201			}
1202	
1203			block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1204			block_poll.events = POLLIN;
1205			for (mic->mic_virtblk.signaled = 0;
1206			     !mic->mic_virtblk.signaled;) {
1207				block_poll.revents = 0;
1208						/* timeout in 1 sec to see signaled */
1209				ret = poll(&block_poll, 1, 1000);
1210				if (ret < 0) {
1211					mpsslog("%s %d: poll failed: %s\n",
1212						__func__, __LINE__,
1213						strerror(errno));
1214					continue;
1215				}
1216	
1217				if (!(block_poll.revents & POLLIN)) {
1218	#ifdef DEBUG
1219					mpsslog("%s %d: block_poll.revents=0x%x\n",
1220						__func__, __LINE__, block_poll.revents);
1221	#endif
1222					continue;
1223				}
1224	
1225				/* POLLIN */
1226				while (vring.info->avail_idx !=
1227					le16toh(vring.vr.avail->idx)) {
1228					/* read header element */
1229					avail_idx =
1230						vring.info->avail_idx &
1231						(vring.vr.num - 1);
1232					desc_idx = le16toh(
1233						vring.vr.avail->ring[avail_idx]);
1234					desc = &vring.vr.desc[desc_idx];
1235	#ifdef DEBUG
1236					mpsslog("%s() %d: avail_idx=%d ",
1237						__func__, __LINE__,
1238						vring.info->avail_idx);
1239					mpsslog("vring.vr.num=%d desc=%p\n",
1240						vring.vr.num, desc);
1241	#endif
1242					status = header_error_check(desc);
1243					ret = read_header(
1244						mic->mic_virtblk.virtio_block_fd,
1245						&hdr, desc_idx);
1246					if (ret < 0) {
1247						mpsslog("%s() %d %s: ret=%d %s\n",
1248							__func__, __LINE__,
1249							mic->name, ret,
1250							strerror(errno));
1251						break;
1252					}
1253					/* buffer element */
1254					piov = iovec;
1255					status = 0;
1256					fos = mic->mic_virtblk.backend_addr +
1257						(hdr.sector * SECTOR_SIZE);
1258					buffer_desc_idx = next_desc(desc);
1259					desc_idx = buffer_desc_idx;
1260					for (desc = &vring.vr.desc[buffer_desc_idx];
1261					     desc->flags & VRING_DESC_F_NEXT;
1262					     desc_idx = next_desc(desc),
1263						     desc = &vring.vr.desc[desc_idx]) {
1264						piov->iov_len = desc->len;
1265						piov->iov_base = fos;
1266						piov++;
1267						fos += desc->len;
1268					}
1269					/* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1270					if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1271						VIRTIO_BLK_T_GET_ID)) {
1272						/*
1273						  VIRTIO_BLK_T_IN - does not do
1274						  anything. Probably for documenting.
1275						  VIRTIO_BLK_T_SCSI_CMD - for
1276						  virtio_scsi.
1277						  VIRTIO_BLK_T_FLUSH - turned off in
1278						  config space.
1279						  VIRTIO_BLK_T_BARRIER - defined but not
1280						  used in anywhere.
1281						*/
1282						mpsslog("%s() %d: type %x ",
1283							__func__, __LINE__,
1284							hdr.type);
1285						mpsslog("is not supported\n");
1286						status = -ENOTSUP;
1287	
1288					} else {
1289						ret = transfer_blocks(
1290						mic->mic_virtblk.virtio_block_fd,
1291							iovec,
1292							piov - iovec);
1293						if (ret < 0 &&
1294						    status != 0)
1295							status = ret;
1296					}
1297					/* write status and update used pointer */
1298					if (status != 0)
1299						status = status_error_check(desc);
1300					ret = write_status(
1301						mic->mic_virtblk.virtio_block_fd,
1302						&status);
1303	#ifdef DEBUG
1304					mpsslog("%s() %d: write status=%d on desc=%p\n",
1305						__func__, __LINE__,
1306						status, desc);
1307	#endif
1308				}
1309			}
1310			free(iovec);
1311	_stop_virtblk:
1312			stop_virtblk(mic);
1313	_close_backend:
1314			close_backend(mic);
1315		}  /* forever */
1316	
1317		pthread_exit(NULL);
1318	}
1319	
1320	static void
1321	reset(struct mic_info *mic)
1322	{
1323	#define RESET_TIMEOUT 120
1324		int i = RESET_TIMEOUT;
1325		setsysfs(mic->name, "state", "reset");
1326		while (i) {
1327			char *state;
1328			state = readsysfs(mic->name, "state");
1329			if (!state)
1330				goto retry;
1331			mpsslog("%s: %s %d state %s\n",
1332				mic->name, __func__, __LINE__, state);
1333	
1334			if (!strcmp(state, "ready")) {
1335				free(state);
1336				break;
1337			}
1338			free(state);
1339	retry:
1340			sleep(1);
1341			i--;
1342		}
1343	}
1344	
1345	static int
1346	get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1347	{
1348		if (!strcmp(shutdown_status, "nop"))
1349			return MIC_NOP;
1350		if (!strcmp(shutdown_status, "crashed"))
1351			return MIC_CRASHED;
1352		if (!strcmp(shutdown_status, "halted"))
1353			return MIC_HALTED;
1354		if (!strcmp(shutdown_status, "poweroff"))
1355			return MIC_POWER_OFF;
1356		if (!strcmp(shutdown_status, "restart"))
1357			return MIC_RESTART;
1358		mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1359		/* Invalid state */
1360		assert(0);
1361	};
1362	
1363	static int get_mic_state(struct mic_info *mic)
1364	{
1365		char *state = NULL;
1366		enum mic_states mic_state;
1367	
1368		while (!state) {
1369			state = readsysfs(mic->name, "state");
1370			sleep(1);
1371		}
1372		mpsslog("%s: %s %d state %s\n",
1373			mic->name, __func__, __LINE__, state);
1374	
1375		if (!strcmp(state, "ready")) {
1376			mic_state = MIC_READY;
1377		} else if (!strcmp(state, "booting")) {
1378			mic_state = MIC_BOOTING;
1379		} else if (!strcmp(state, "online")) {
1380			mic_state = MIC_ONLINE;
1381		} else if (!strcmp(state, "shutting_down")) {
1382			mic_state = MIC_SHUTTING_DOWN;
1383		} else if (!strcmp(state, "reset_failed")) {
1384			mic_state = MIC_RESET_FAILED;
1385		} else if (!strcmp(state, "resetting")) {
1386			mic_state = MIC_RESETTING;
1387		} else {
1388			mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1389			assert(0);
1390		}
1391	
1392		free(state);
1393		return mic_state;
1394	};
1395	
1396	static void mic_handle_shutdown(struct mic_info *mic)
1397	{
1398	#define SHUTDOWN_TIMEOUT 60
1399		int i = SHUTDOWN_TIMEOUT;
1400		char *shutdown_status;
1401		while (i) {
1402			shutdown_status = readsysfs(mic->name, "shutdown_status");
1403			if (!shutdown_status) {
1404				sleep(1);
1405				continue;
1406			}
1407			mpsslog("%s: %s %d shutdown_status %s\n",
1408				mic->name, __func__, __LINE__, shutdown_status);
1409			switch (get_mic_shutdown_status(mic, shutdown_status)) {
1410			case MIC_RESTART:
1411				mic->restart = 1;
1412			case MIC_HALTED:
1413			case MIC_POWER_OFF:
1414			case MIC_CRASHED:
1415				free(shutdown_status);
1416				goto reset;
1417			default:
1418				break;
1419			}
1420			free(shutdown_status);
1421			sleep(1);
1422			i--;
1423		}
1424	reset:
1425		if (!i)
1426			mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
1427				mic->name, __func__, __LINE__, shutdown_status);
1428		reset(mic);
1429	}
1430	
1431	static int open_state_fd(struct mic_info *mic)
1432	{
1433		char pathname[PATH_MAX];
1434		int fd;
1435	
1436		snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1437			 MICSYSFSDIR, mic->name, "state");
1438	
1439		fd = open(pathname, O_RDONLY);
1440		if (fd < 0)
1441			mpsslog("%s: opening file %s failed %s\n",
1442				mic->name, pathname, strerror(errno));
1443		return fd;
1444	}
1445	
1446	static int block_till_state_change(int fd, struct mic_info *mic)
1447	{
1448		struct pollfd ufds[1];
1449		char value[PAGE_SIZE];
1450		int ret;
1451	
1452		ufds[0].fd = fd;
1453		ufds[0].events = POLLERR | POLLPRI;
1454		ret = poll(ufds, 1, -1);
1455		if (ret < 0) {
1456			mpsslog("%s: %s %d poll failed %s\n",
1457				mic->name, __func__, __LINE__, strerror(errno));
1458			return ret;
1459		}
1460	
1461		ret = lseek(fd, 0, SEEK_SET);
1462		if (ret < 0) {
1463			mpsslog("%s: %s %d Failed to seek to 0: %s\n",
1464				mic->name, __func__, __LINE__, strerror(errno));
1465			return ret;
1466		}
1467	
1468		ret = read(fd, value, sizeof(value));
1469		if (ret < 0) {
1470			mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
1471				mic->name, __func__, __LINE__, strerror(errno));
1472			return ret;
1473		}
1474	
1475		return 0;
1476	}
1477	
1478	static void *
1479	mic_config(void *arg)
1480	{
1481		struct mic_info *mic = (struct mic_info *)arg;
1482		int fd, ret, stat = 0;
1483	
1484		fd = open_state_fd(mic);
1485		if (fd < 0) {
1486			mpsslog("%s: %s %d open state fd failed %s\n",
1487				mic->name, __func__, __LINE__, strerror(errno));
1488			goto exit;
1489		}
1490	
1491		do {
1492			ret = block_till_state_change(fd, mic);
1493			if (ret < 0) {
1494				mpsslog("%s: %s %d block_till_state_change error %s\n",
1495					mic->name, __func__, __LINE__, strerror(errno));
1496				goto close_exit;
1497			}
1498	
1499			switch (get_mic_state(mic)) {
1500			case MIC_SHUTTING_DOWN:
1501				mic_handle_shutdown(mic);
1502				break;
1503			case MIC_READY:
1504			case MIC_RESET_FAILED:
1505				ret = kill(mic->pid, SIGTERM);
1506				mpsslog("%s: %s %d kill pid %d ret %d\n",
1507					mic->name, __func__, __LINE__,
1508					mic->pid, ret);
1509				if (!ret) {
1510					ret = waitpid(mic->pid, &stat,
1511						      WIFSIGNALED(stat));
1512					mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1513						mic->name, __func__, __LINE__,
1514						ret, mic->pid);
1515				}
1516				if (mic->boot_on_resume) {
1517					setsysfs(mic->name, "state", "boot");
1518					mic->boot_on_resume = 0;
1519				}
1520				goto close_exit;
1521			default:
1522				break;
1523			}
1524		} while (1);
1525	
1526	close_exit:
1527		close(fd);
1528	exit:
1529		init_mic(mic);
1530		pthread_exit(NULL);
1531	}
1532	
1533	static void
1534	set_cmdline(struct mic_info *mic)
1535	{
1536		char buffer[PATH_MAX];
1537		int len;
1538	
1539		len = snprintf(buffer, PATH_MAX,
1540			"clocksource=tsc highres=off nohz=off ");
1541		len += snprintf(buffer + len, PATH_MAX,
1542			"cpufreq_on;corec6_off;pc3_off;pc6_off ");
1543		len += snprintf(buffer + len, PATH_MAX,
1544			"ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1545			mic->id + 1);
1546	
1547		setsysfs(mic->name, "cmdline", buffer);
1548		mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1549		snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
1550		mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1551	}
1552	
1553	static void
1554	set_log_buf_info(struct mic_info *mic)
1555	{
1556		int fd;
1557		off_t len;
1558		char system_map[] = "/lib/firmware/mic/System.map";
1559		char *map, *temp, log_buf[17] = {'\0'};
1560	
1561		fd = open(system_map, O_RDONLY);
1562		if (fd < 0) {
1563			mpsslog("%s: Opening System.map failed: %d\n",
1564				mic->name, errno);
1565			return;
1566		}
1567		len = lseek(fd, 0, SEEK_END);
1568		if (len < 0) {
1569			mpsslog("%s: Reading System.map size failed: %d\n",
1570				mic->name, errno);
1571			close(fd);
1572			return;
1573		}
1574		map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1575		if (map == MAP_FAILED) {
1576			mpsslog("%s: mmap of System.map failed: %d\n",
1577				mic->name, errno);
1578			close(fd);
1579			return;
1580		}
1581		temp = strstr(map, "__log_buf");
1582		if (!temp) {
1583			mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1584			munmap(map, len);
1585			close(fd);
1586			return;
1587		}
1588		strncpy(log_buf, temp - 19, 16);
1589		setsysfs(mic->name, "log_buf_addr", log_buf);
1590		mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1591		temp = strstr(map, "log_buf_len");
1592		if (!temp) {
1593			mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1594			munmap(map, len);
1595			close(fd);
1596			return;
1597		}
1598		strncpy(log_buf, temp - 19, 16);
1599		setsysfs(mic->name, "log_buf_len", log_buf);
1600		mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1601		munmap(map, len);
1602		close(fd);
1603	}
1604	
1605	static void
1606	change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1607	{
1608		struct mic_info *mic;
1609	
1610		for (mic = mic_list.next; mic != NULL; mic = mic->next)
1611			mic->mic_virtblk.signaled = 1/* true */;
1612	}
1613	
1614	static void
1615	set_mic_boot_params(struct mic_info *mic)
1616	{
1617		set_log_buf_info(mic);
1618		set_cmdline(mic);
1619	}
1620	
1621	static void *
1622	init_mic(void *arg)
1623	{
1624		struct mic_info *mic = (struct mic_info *)arg;
1625		struct sigaction ignore = {
1626			.sa_flags = 0,
1627			.sa_handler = SIG_IGN
1628		};
1629		struct sigaction act = {
1630			.sa_flags = SA_SIGINFO,
1631			.sa_sigaction = change_virtblk_backend,
1632		};
1633		char buffer[PATH_MAX];
1634		int err, fd;
1635	
1636		/*
1637		 * Currently, one virtio block device is supported for each MIC card
1638		 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1639		 * The signal informs the virtio block backend about a change in the
1640		 * configuration file which specifies the virtio backend file name on
1641		 * the host. Virtio block backend then re-reads the configuration file
1642		 * and switches to the new block device. This signalling mechanism may
1643		 * not be required once multiple virtio block devices are supported by
1644		 * the MIC daemon.
1645		 */
1646		sigaction(SIGUSR1, &ignore, NULL);
1647	retry:
1648		fd = open_state_fd(mic);
1649		if (fd < 0) {
1650			mpsslog("%s: %s %d open state fd failed %s\n",
1651				mic->name, __func__, __LINE__, strerror(errno));
1652			sleep(2);
1653			goto retry;
1654		}
1655	
1656		if (mic->restart) {
1657			snprintf(buffer, PATH_MAX, "boot");
1658			setsysfs(mic->name, "state", buffer);
1659			mpsslog("%s restarting mic %d\n",
1660				mic->name, mic->restart);
1661			mic->restart = 0;
1662		}
1663	
1664		while (1) {
1665			while (block_till_state_change(fd, mic)) {
1666				mpsslog("%s: %s %d block_till_state_change error %s\n",
1667					mic->name, __func__, __LINE__, strerror(errno));
1668				sleep(2);
1669				continue;
1670			}
1671	
1672			if (get_mic_state(mic) == MIC_BOOTING)
1673				break;
1674		}
1675	
1676		mic->pid = fork();
1677		switch (mic->pid) {
1678		case 0:
1679			add_virtio_device(mic, &virtcons_dev_page.dd);
1680			add_virtio_device(mic, &virtnet_dev_page.dd);
1681			err = pthread_create(&mic->mic_console.console_thread, NULL,
1682				virtio_console, mic);
1683			if (err)
1684				mpsslog("%s virtcons pthread_create failed %s\n",
1685					mic->name, strerror(err));
1686			err = pthread_create(&mic->mic_net.net_thread, NULL,
1687				virtio_net, mic);
1688			if (err)
1689				mpsslog("%s virtnet pthread_create failed %s\n",
1690					mic->name, strerror(err));
1691			err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1692				virtio_block, mic);
1693			if (err)
1694				mpsslog("%s virtblk pthread_create failed %s\n",
1695					mic->name, strerror(err));
1696			sigemptyset(&act.sa_mask);
1697			err = sigaction(SIGUSR1, &act, NULL);
1698			if (err)
1699				mpsslog("%s sigaction SIGUSR1 failed %s\n",
1700					mic->name, strerror(errno));
1701			while (1)
1702				sleep(60);
1703		case -1:
1704			mpsslog("fork failed MIC name %s id %d errno %d\n",
1705				mic->name, mic->id, errno);
1706			break;
1707		default:
1708			err = pthread_create(&mic->config_thread, NULL,
1709					     mic_config, mic);
1710			if (err)
1711				mpsslog("%s mic_config pthread_create failed %s\n",
1712					mic->name, strerror(err));
1713		}
1714	
1715		return NULL;
1716	}
1717	
1718	static void
1719	start_daemon(void)
1720	{
1721		struct mic_info *mic;
1722		int err;
1723	
1724		for (mic = mic_list.next; mic; mic = mic->next) {
1725			set_mic_boot_params(mic);
1726			err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
1727			if (err)
1728				mpsslog("%s init_mic pthread_create failed %s\n",
1729					mic->name, strerror(err));
1730		}
1731	
1732		while (1)
1733			sleep(60);
1734	}
1735	
1736	static int
1737	init_mic_list(void)
1738	{
1739		struct mic_info *mic = &mic_list;
1740		struct dirent *file;
1741		DIR *dp;
1742		int cnt = 0;
1743	
1744		dp = opendir(MICSYSFSDIR);
1745		if (!dp)
1746			return 0;
1747	
1748		while ((file = readdir(dp)) != NULL) {
1749			if (!strncmp(file->d_name, "mic", 3)) {
1750				mic->next = calloc(1, sizeof(struct mic_info));
1751				if (mic->next) {
1752					mic = mic->next;
1753					mic->id = atoi(&file->d_name[3]);
1754					mic->name = malloc(strlen(file->d_name) + 16);
1755					if (mic->name)
1756						strcpy(mic->name, file->d_name);
1757					mpsslog("MIC name %s id %d\n", mic->name,
1758						mic->id);
1759					cnt++;
1760				}
1761			}
1762		}
1763	
1764		closedir(dp);
1765		return cnt;
1766	}
1767	
1768	void
1769	mpsslog(char *format, ...)
1770	{
1771		va_list args;
1772		char buffer[4096];
1773		char ts[52], *ts1;
1774		time_t t;
1775	
1776		if (logfp == NULL)
1777			return;
1778	
1779		va_start(args, format);
1780		vsprintf(buffer, format, args);
1781		va_end(args);
1782	
1783		time(&t);
1784		ts1 = ctime_r(&t, ts);
1785		ts1[strlen(ts1) - 1] = '\0';
1786		fprintf(logfp, "%s: %s", ts1, buffer);
1787	
1788		fflush(logfp);
1789	}
1790	
1791	int
1792	main(int argc, char *argv[])
1793	{
1794		int cnt;
1795		pid_t pid;
1796	
1797		myname = argv[0];
1798	
1799		logfp = fopen(LOGFILE_NAME, "a+");
1800		if (!logfp) {
1801			fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1802			exit(1);
1803		}
1804		pid = fork();
1805		switch (pid) {
1806		case 0:
1807			break;
1808		case -1:
1809			exit(2);
1810		default:
1811			exit(0);
1812		}
1813	
1814		mpsslog("MIC Daemon start\n");
1815	
1816		cnt = init_mic_list();
1817		if (cnt == 0) {
1818			mpsslog("MIC module not loaded\n");
1819			exit(3);
1820		}
1821		mpsslog("MIC found %d devices\n", cnt);
1822	
1823		start_daemon();
1824	
1825		exit(0);
1826	}
Hide Line Numbers
About Kernel Documentation Linux Kernel Contact Linux Resources Linux Blog

Information is copyright its respective author. All material is available from the Linux Kernel Source distributed under a GPL License. This page is provided as a free service by mjmwired.net.