Based on kernel version 4.8. Page generated on 2016-10-06 23:18 EST.
1 /* 2 * Intel MIC Platform Software Stack (MPSS) 3 * 4 * Copyright(c) 2013 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License, version 2, as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * The full GNU General Public License is included in this distribution in 16 * the file called "COPYING". 17 * 18 * Intel MIC User Space Tools. 19 */ 20 21 #define _GNU_SOURCE 22 23 #include <stdlib.h> 24 #include <fcntl.h> 25 #include <getopt.h> 26 #include <assert.h> 27 #include <unistd.h> 28 #include <stdbool.h> 29 #include <signal.h> 30 #include <poll.h> 31 #include <features.h> 32 #include <sys/types.h> 33 #include <sys/stat.h> 34 #include <sys/mman.h> 35 #include <sys/socket.h> 36 #include <linux/virtio_ring.h> 37 #include <linux/virtio_net.h> 38 #include <linux/virtio_console.h> 39 #include <linux/virtio_blk.h> 40 #include <linux/version.h> 41 #include "mpssd.h" 42 #include <linux/mic_ioctl.h> 43 #include <linux/mic_common.h> 44 #include <tools/endian.h> 45 46 static void *init_mic(void *arg); 47 48 static FILE *logfp; 49 static struct mic_info mic_list; 50 51 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 52 53 #define min_t(type, x, y) ({ \ 54 type __min1 = (x); \ 55 type __min2 = (y); \ 56 __min1 < __min2 ? __min1 : __min2; }) 57 58 /* align addr on a size boundary - adjust address up/down if needed */ 59 #define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1))) 60 #define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size) 61 62 /* align addr on a size boundary - adjust address up if needed */ 63 #define _ALIGN(addr, size) _ALIGN_UP(addr, size) 64 65 /* to align the pointer to the (next) page boundary */ 66 #define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) 67 68 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) 69 70 #define GSO_ENABLED 1 71 #define MAX_GSO_SIZE (64 * 1024) 72 #define ETH_H_LEN 14 73 #define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64)) 74 #define MIC_DEVICE_PAGE_END 0x1000 75 76 #ifndef VIRTIO_NET_HDR_F_DATA_VALID 77 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ 78 #endif 79 80 static struct { 81 struct mic_device_desc dd; 82 struct mic_vqconfig vqconfig[2]; 83 __u32 host_features, guest_acknowledgements; 84 struct virtio_console_config cons_config; 85 } virtcons_dev_page = { 86 .dd = { 87 .type = VIRTIO_ID_CONSOLE, 88 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig), 89 .feature_len = sizeof(virtcons_dev_page.host_features), 90 .config_len = sizeof(virtcons_dev_page.cons_config), 91 }, 92 .vqconfig[0] = { 93 .num = htole16(MIC_VRING_ENTRIES), 94 }, 95 .vqconfig[1] = { 96 .num = htole16(MIC_VRING_ENTRIES), 97 }, 98 }; 99 100 static struct { 101 struct mic_device_desc dd; 102 struct mic_vqconfig vqconfig[2]; 103 __u32 host_features, guest_acknowledgements; 104 struct virtio_net_config net_config; 105 } virtnet_dev_page = { 106 .dd = { 107 .type = VIRTIO_ID_NET, 108 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig), 109 .feature_len = sizeof(virtnet_dev_page.host_features), 110 .config_len = sizeof(virtnet_dev_page.net_config), 111 }, 112 .vqconfig[0] = { 113 .num = htole16(MIC_VRING_ENTRIES), 114 }, 115 .vqconfig[1] = { 116 .num = htole16(MIC_VRING_ENTRIES), 117 }, 118 #if GSO_ENABLED 119 .host_features = htole32( 120 1 << VIRTIO_NET_F_CSUM | 121 1 << VIRTIO_NET_F_GSO | 122 1 << VIRTIO_NET_F_GUEST_TSO4 | 123 1 << VIRTIO_NET_F_GUEST_TSO6 | 124 1 << VIRTIO_NET_F_GUEST_ECN), 125 #else 126 .host_features = 0, 127 #endif 128 }; 129 130 static const char *mic_config_dir = "/etc/mpss"; 131 static const char *virtblk_backend = "VIRTBLK_BACKEND"; 132 static struct { 133 struct mic_device_desc dd; 134 struct mic_vqconfig vqconfig[1]; 135 __u32 host_features, guest_acknowledgements; 136 struct virtio_blk_config blk_config; 137 } virtblk_dev_page = { 138 .dd = { 139 .type = VIRTIO_ID_BLOCK, 140 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig), 141 .feature_len = sizeof(virtblk_dev_page.host_features), 142 .config_len = sizeof(virtblk_dev_page.blk_config), 143 }, 144 .vqconfig[0] = { 145 .num = htole16(MIC_VRING_ENTRIES), 146 }, 147 .host_features = 148 htole32(1<<VIRTIO_BLK_F_SEG_MAX), 149 .blk_config = { 150 .seg_max = htole32(MIC_VRING_ENTRIES - 2), 151 .capacity = htole64(0), 152 } 153 }; 154 155 static char *myname; 156 157 static int 158 tap_configure(struct mic_info *mic, char *dev) 159 { 160 pid_t pid; 161 char *ifargv[7]; 162 char ipaddr[IFNAMSIZ]; 163 int ret = 0; 164 165 pid = fork(); 166 if (pid == 0) { 167 ifargv[0] = "ip"; 168 ifargv[1] = "link"; 169 ifargv[2] = "set"; 170 ifargv[3] = dev; 171 ifargv[4] = "up"; 172 ifargv[5] = NULL; 173 mpsslog("Configuring %s\n", dev); 174 ret = execvp("ip", ifargv); 175 if (ret < 0) { 176 mpsslog("%s execvp failed errno %s\n", 177 mic->name, strerror(errno)); 178 return ret; 179 } 180 } 181 if (pid < 0) { 182 mpsslog("%s fork failed errno %s\n", 183 mic->name, strerror(errno)); 184 return ret; 185 } 186 187 ret = waitpid(pid, NULL, 0); 188 if (ret < 0) { 189 mpsslog("%s waitpid failed errno %s\n", 190 mic->name, strerror(errno)); 191 return ret; 192 } 193 194 snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1); 195 196 pid = fork(); 197 if (pid == 0) { 198 ifargv[0] = "ip"; 199 ifargv[1] = "addr"; 200 ifargv[2] = "add"; 201 ifargv[3] = ipaddr; 202 ifargv[4] = "dev"; 203 ifargv[5] = dev; 204 ifargv[6] = NULL; 205 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr); 206 ret = execvp("ip", ifargv); 207 if (ret < 0) { 208 mpsslog("%s execvp failed errno %s\n", 209 mic->name, strerror(errno)); 210 return ret; 211 } 212 } 213 if (pid < 0) { 214 mpsslog("%s fork failed errno %s\n", 215 mic->name, strerror(errno)); 216 return ret; 217 } 218 219 ret = waitpid(pid, NULL, 0); 220 if (ret < 0) { 221 mpsslog("%s waitpid failed errno %s\n", 222 mic->name, strerror(errno)); 223 return ret; 224 } 225 mpsslog("MIC name %s %s %d DONE!\n", 226 mic->name, __func__, __LINE__); 227 return 0; 228 } 229 230 static int tun_alloc(struct mic_info *mic, char *dev) 231 { 232 struct ifreq ifr; 233 int fd, err; 234 #if GSO_ENABLED 235 unsigned offload; 236 #endif 237 fd = open("/dev/net/tun", O_RDWR); 238 if (fd < 0) { 239 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno)); 240 goto done; 241 } 242 243 memset(&ifr, 0, sizeof(ifr)); 244 245 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 246 if (*dev) 247 strncpy(ifr.ifr_name, dev, IFNAMSIZ); 248 249 err = ioctl(fd, TUNSETIFF, (void *)&ifr); 250 if (err < 0) { 251 mpsslog("%s %s %d TUNSETIFF failed %s\n", 252 mic->name, __func__, __LINE__, strerror(errno)); 253 close(fd); 254 return err; 255 } 256 #if GSO_ENABLED 257 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN; 258 259 err = ioctl(fd, TUNSETOFFLOAD, offload); 260 if (err < 0) { 261 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n", 262 mic->name, __func__, __LINE__, strerror(errno)); 263 close(fd); 264 return err; 265 } 266 #endif 267 strcpy(dev, ifr.ifr_name); 268 mpsslog("Created TAP %s\n", dev); 269 done: 270 return fd; 271 } 272 273 #define NET_FD_VIRTIO_NET 0 274 #define NET_FD_TUN 1 275 #define MAX_NET_FD 2 276 277 static void set_dp(struct mic_info *mic, int type, void *dp) 278 { 279 switch (type) { 280 case VIRTIO_ID_CONSOLE: 281 mic->mic_console.console_dp = dp; 282 return; 283 case VIRTIO_ID_NET: 284 mic->mic_net.net_dp = dp; 285 return; 286 case VIRTIO_ID_BLOCK: 287 mic->mic_virtblk.block_dp = dp; 288 return; 289 } 290 mpsslog("%s %s %d not found\n", mic->name, __func__, type); 291 assert(0); 292 } 293 294 static void *get_dp(struct mic_info *mic, int type) 295 { 296 switch (type) { 297 case VIRTIO_ID_CONSOLE: 298 return mic->mic_console.console_dp; 299 case VIRTIO_ID_NET: 300 return mic->mic_net.net_dp; 301 case VIRTIO_ID_BLOCK: 302 return mic->mic_virtblk.block_dp; 303 } 304 mpsslog("%s %s %d not found\n", mic->name, __func__, type); 305 assert(0); 306 return NULL; 307 } 308 309 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type) 310 { 311 struct mic_device_desc *d; 312 int i; 313 void *dp = get_dp(mic, type); 314 315 for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE; 316 i += mic_total_desc_size(d)) { 317 d = dp + i; 318 319 /* End of list */ 320 if (d->type == 0) 321 break; 322 323 if (d->type == -1) 324 continue; 325 326 mpsslog("%s %s d-> type %d d %p\n", 327 mic->name, __func__, d->type, d); 328 329 if (d->type == (__u8)type) 330 return d; 331 } 332 mpsslog("%s %s %d not found\n", mic->name, __func__, type); 333 return NULL; 334 } 335 336 /* See comments in vhost.c for explanation of next_desc() */ 337 static unsigned next_desc(struct vring_desc *desc) 338 { 339 unsigned int next; 340 341 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) 342 return -1U; 343 next = le16toh(desc->next); 344 return next; 345 } 346 347 /* Sum up all the IOVEC length */ 348 static ssize_t 349 sum_iovec_len(struct mic_copy_desc *copy) 350 { 351 ssize_t sum = 0; 352 unsigned int i; 353 354 for (i = 0; i < copy->iovcnt; i++) 355 sum += copy->iov[i].iov_len; 356 return sum; 357 } 358 359 static inline void verify_out_len(struct mic_info *mic, 360 struct mic_copy_desc *copy) 361 { 362 if (copy->out_len != sum_iovec_len(copy)) { 363 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n", 364 mic->name, __func__, __LINE__, 365 copy->out_len, sum_iovec_len(copy)); 366 assert(copy->out_len == sum_iovec_len(copy)); 367 } 368 } 369 370 /* Display an iovec */ 371 static void 372 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy, 373 const char *s, int line) 374 { 375 unsigned int i; 376 377 for (i = 0; i < copy->iovcnt; i++) 378 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n", 379 mic->name, s, line, i, 380 copy->iov[i].iov_base, copy->iov[i].iov_len); 381 } 382 383 static inline __u16 read_avail_idx(struct mic_vring *vr) 384 { 385 return ACCESS_ONCE(vr->info->avail_idx); 386 } 387 388 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr, 389 struct mic_copy_desc *copy, ssize_t len) 390 { 391 copy->vr_idx = tx ? 0 : 1; 392 copy->update_used = true; 393 if (type == VIRTIO_ID_NET) 394 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr); 395 else 396 copy->iov[0].iov_len = len; 397 } 398 399 /* Central API which triggers the copies */ 400 static int 401 mic_virtio_copy(struct mic_info *mic, int fd, 402 struct mic_vring *vr, struct mic_copy_desc *copy) 403 { 404 int ret; 405 406 ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy); 407 if (ret) { 408 mpsslog("%s %s %d errno %s ret %d\n", 409 mic->name, __func__, __LINE__, 410 strerror(errno), ret); 411 } 412 return ret; 413 } 414 415 static inline unsigned _vring_size(unsigned int num, unsigned long align) 416 { 417 return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num) 418 + align - 1) & ~(align - 1)) 419 + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; 420 } 421 422 /* 423 * This initialization routine requires at least one 424 * vring i.e. vr0. vr1 is optional. 425 */ 426 static void * 427 init_vr(struct mic_info *mic, int fd, int type, 428 struct mic_vring *vr0, struct mic_vring *vr1, int num_vq) 429 { 430 int vr_size; 431 char *va; 432 433 vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES, 434 MIC_VIRTIO_RING_ALIGN) + 435 sizeof(struct _mic_vring_info)); 436 va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq, 437 PROT_READ, MAP_SHARED, fd, 0); 438 if (MAP_FAILED == va) { 439 mpsslog("%s %s %d mmap failed errno %s\n", 440 mic->name, __func__, __LINE__, 441 strerror(errno)); 442 goto done; 443 } 444 set_dp(mic, type, va); 445 vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END]; 446 vr0->info = vr0->va + 447 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN); 448 vring_init(&vr0->vr, 449 MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN); 450 mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ", 451 __func__, mic->name, vr0->va, vr0->info, vr_size, 452 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); 453 mpsslog("magic 0x%x expected 0x%x\n", 454 le32toh(vr0->info->magic), MIC_MAGIC + type); 455 assert(le32toh(vr0->info->magic) == MIC_MAGIC + type); 456 if (vr1) { 457 vr1->va = (struct mic_vring *) 458 &va[MIC_DEVICE_PAGE_END + vr_size]; 459 vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES, 460 MIC_VIRTIO_RING_ALIGN); 461 vring_init(&vr1->vr, 462 MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN); 463 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ", 464 __func__, mic->name, vr1->va, vr1->info, vr_size, 465 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); 466 mpsslog("magic 0x%x expected 0x%x\n", 467 le32toh(vr1->info->magic), MIC_MAGIC + type + 1); 468 assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1); 469 } 470 done: 471 return va; 472 } 473 474 static int 475 wait_for_card_driver(struct mic_info *mic, int fd, int type) 476 { 477 struct pollfd pollfd; 478 int err; 479 struct mic_device_desc *desc = get_device_desc(mic, type); 480 __u8 prev_status; 481 482 if (!desc) 483 return -ENODEV; 484 prev_status = desc->status; 485 pollfd.fd = fd; 486 mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n", 487 mic->name, __func__, type, desc->status); 488 489 while (1) { 490 pollfd.events = POLLIN; 491 pollfd.revents = 0; 492 err = poll(&pollfd, 1, -1); 493 if (err < 0) { 494 mpsslog("%s %s poll failed %s\n", 495 mic->name, __func__, strerror(errno)); 496 continue; 497 } 498 499 if (pollfd.revents) { 500 if (desc->status != prev_status) { 501 mpsslog("%s %s Waiting... desc-> type %d " 502 "status 0x%x\n", 503 mic->name, __func__, type, 504 desc->status); 505 prev_status = desc->status; 506 } 507 if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { 508 mpsslog("%s %s poll.revents %d\n", 509 mic->name, __func__, pollfd.revents); 510 mpsslog("%s %s desc-> type %d status 0x%x\n", 511 mic->name, __func__, type, 512 desc->status); 513 break; 514 } 515 } 516 } 517 return 0; 518 } 519 520 /* Spin till we have some descriptors */ 521 static void 522 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr) 523 { 524 __u16 avail_idx = read_avail_idx(vr); 525 526 while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) { 527 #ifdef DEBUG 528 mpsslog("%s %s waiting for desc avail %d info_avail %d\n", 529 mic->name, __func__, 530 le16toh(vr->vr.avail->idx), vr->info->avail_idx); 531 #endif 532 sched_yield(); 533 } 534 } 535 536 static void * 537 virtio_net(void *arg) 538 { 539 static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)]; 540 static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64))); 541 struct iovec vnet_iov[2][2] = { 542 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) }, 543 { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } }, 544 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) }, 545 { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } }, 546 }; 547 struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1]; 548 struct mic_info *mic = (struct mic_info *)arg; 549 char if_name[IFNAMSIZ]; 550 struct pollfd net_poll[MAX_NET_FD]; 551 struct mic_vring tx_vr, rx_vr; 552 struct mic_copy_desc copy; 553 struct mic_device_desc *desc; 554 int err; 555 556 snprintf(if_name, IFNAMSIZ, "mic%d", mic->id); 557 mic->mic_net.tap_fd = tun_alloc(mic, if_name); 558 if (mic->mic_net.tap_fd < 0) 559 goto done; 560 561 if (tap_configure(mic, if_name)) 562 goto done; 563 mpsslog("MIC name %s id %d\n", mic->name, mic->id); 564 565 net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd; 566 net_poll[NET_FD_VIRTIO_NET].events = POLLIN; 567 net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd; 568 net_poll[NET_FD_TUN].events = POLLIN; 569 570 if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd, 571 VIRTIO_ID_NET, &tx_vr, &rx_vr, 572 virtnet_dev_page.dd.num_vq)) { 573 mpsslog("%s init_vr failed %s\n", 574 mic->name, strerror(errno)); 575 goto done; 576 } 577 578 copy.iovcnt = 2; 579 desc = get_device_desc(mic, VIRTIO_ID_NET); 580 581 while (1) { 582 ssize_t len; 583 584 net_poll[NET_FD_VIRTIO_NET].revents = 0; 585 net_poll[NET_FD_TUN].revents = 0; 586 587 /* Start polling for data from tap and virtio net */ 588 err = poll(net_poll, 2, -1); 589 if (err < 0) { 590 mpsslog("%s poll failed %s\n", 591 __func__, strerror(errno)); 592 continue; 593 } 594 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 595 err = wait_for_card_driver(mic, 596 mic->mic_net.virtio_net_fd, 597 VIRTIO_ID_NET); 598 if (err) { 599 mpsslog("%s %s %d Exiting...\n", 600 mic->name, __func__, __LINE__); 601 break; 602 } 603 } 604 /* 605 * Check if there is data to be read from TUN and write to 606 * virtio net fd if there is. 607 */ 608 if (net_poll[NET_FD_TUN].revents & POLLIN) { 609 copy.iov = iov0; 610 len = readv(net_poll[NET_FD_TUN].fd, 611 copy.iov, copy.iovcnt); 612 if (len > 0) { 613 struct virtio_net_hdr *hdr 614 = (struct virtio_net_hdr *)vnet_hdr[0]; 615 616 /* Disable checksums on the card since we are on 617 a reliable PCIe link */ 618 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; 619 #ifdef DEBUG 620 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name, 621 __func__, __LINE__, hdr->flags); 622 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n", 623 copy.out_len, hdr->gso_type); 624 #endif 625 #ifdef DEBUG 626 disp_iovec(mic, copy, __func__, __LINE__); 627 mpsslog("%s %s %d read from tap 0x%lx\n", 628 mic->name, __func__, __LINE__, 629 len); 630 #endif 631 spin_for_descriptors(mic, &tx_vr); 632 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, ©, 633 len); 634 635 err = mic_virtio_copy(mic, 636 mic->mic_net.virtio_net_fd, &tx_vr, 637 ©); 638 if (err < 0) { 639 mpsslog("%s %s %d mic_virtio_copy %s\n", 640 mic->name, __func__, __LINE__, 641 strerror(errno)); 642 } 643 if (!err) 644 verify_out_len(mic, ©); 645 #ifdef DEBUG 646 disp_iovec(mic, copy, __func__, __LINE__); 647 mpsslog("%s %s %d wrote to net 0x%lx\n", 648 mic->name, __func__, __LINE__, 649 sum_iovec_len(©)); 650 #endif 651 /* Reinitialize IOV for next run */ 652 iov0[1].iov_len = MAX_NET_PKT_SIZE; 653 } else if (len < 0) { 654 disp_iovec(mic, ©, __func__, __LINE__); 655 mpsslog("%s %s %d read failed %s ", mic->name, 656 __func__, __LINE__, strerror(errno)); 657 mpsslog("cnt %d sum %zd\n", 658 copy.iovcnt, sum_iovec_len(©)); 659 } 660 } 661 662 /* 663 * Check if there is data to be read from virtio net and 664 * write to TUN if there is. 665 */ 666 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) { 667 while (rx_vr.info->avail_idx != 668 le16toh(rx_vr.vr.avail->idx)) { 669 copy.iov = iov1; 670 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, ©, 671 MAX_NET_PKT_SIZE 672 + sizeof(struct virtio_net_hdr)); 673 674 err = mic_virtio_copy(mic, 675 mic->mic_net.virtio_net_fd, &rx_vr, 676 ©); 677 if (!err) { 678 #ifdef DEBUG 679 struct virtio_net_hdr *hdr 680 = (struct virtio_net_hdr *) 681 vnet_hdr[1]; 682 683 mpsslog("%s %s %d hdr->flags 0x%x, ", 684 mic->name, __func__, __LINE__, 685 hdr->flags); 686 mpsslog("out_len %d gso_type 0x%x\n", 687 copy.out_len, 688 hdr->gso_type); 689 #endif 690 /* Set the correct output iov_len */ 691 iov1[1].iov_len = copy.out_len - 692 sizeof(struct virtio_net_hdr); 693 verify_out_len(mic, ©); 694 #ifdef DEBUG 695 disp_iovec(mic, copy, __func__, 696 __LINE__); 697 mpsslog("%s %s %d ", 698 mic->name, __func__, __LINE__); 699 mpsslog("read from net 0x%lx\n", 700 sum_iovec_len(copy)); 701 #endif 702 len = writev(net_poll[NET_FD_TUN].fd, 703 copy.iov, copy.iovcnt); 704 if (len != sum_iovec_len(©)) { 705 mpsslog("Tun write failed %s ", 706 strerror(errno)); 707 mpsslog("len 0x%zx ", len); 708 mpsslog("read_len 0x%zx\n", 709 sum_iovec_len(©)); 710 } else { 711 #ifdef DEBUG 712 disp_iovec(mic, ©, __func__, 713 __LINE__); 714 mpsslog("%s %s %d ", 715 mic->name, __func__, 716 __LINE__); 717 mpsslog("wrote to tap 0x%lx\n", 718 len); 719 #endif 720 } 721 } else { 722 mpsslog("%s %s %d mic_virtio_copy %s\n", 723 mic->name, __func__, __LINE__, 724 strerror(errno)); 725 break; 726 } 727 } 728 } 729 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR) 730 mpsslog("%s: %s: POLLERR\n", __func__, mic->name); 731 } 732 done: 733 pthread_exit(NULL); 734 } 735 736 /* virtio_console */ 737 #define VIRTIO_CONSOLE_FD 0 738 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1) 739 #define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */ 740 #define MAX_BUFFER_SIZE PAGE_SIZE 741 742 static void * 743 virtio_console(void *arg) 744 { 745 static __u8 vcons_buf[2][PAGE_SIZE]; 746 struct iovec vcons_iov[2] = { 747 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) }, 748 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) }, 749 }; 750 struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1]; 751 struct mic_info *mic = (struct mic_info *)arg; 752 int err; 753 struct pollfd console_poll[MAX_CONSOLE_FD]; 754 int pty_fd; 755 char *pts_name; 756 ssize_t len; 757 struct mic_vring tx_vr, rx_vr; 758 struct mic_copy_desc copy; 759 struct mic_device_desc *desc; 760 761 pty_fd = posix_openpt(O_RDWR); 762 if (pty_fd < 0) { 763 mpsslog("can't open a pseudoterminal master device: %s\n", 764 strerror(errno)); 765 goto _return; 766 } 767 pts_name = ptsname(pty_fd); 768 if (pts_name == NULL) { 769 mpsslog("can't get pts name\n"); 770 goto _close_pty; 771 } 772 printf("%s console message goes to %s\n", mic->name, pts_name); 773 mpsslog("%s console message goes to %s\n", mic->name, pts_name); 774 err = grantpt(pty_fd); 775 if (err < 0) { 776 mpsslog("can't grant access: %s %s\n", 777 pts_name, strerror(errno)); 778 goto _close_pty; 779 } 780 err = unlockpt(pty_fd); 781 if (err < 0) { 782 mpsslog("can't unlock a pseudoterminal: %s %s\n", 783 pts_name, strerror(errno)); 784 goto _close_pty; 785 } 786 console_poll[MONITOR_FD].fd = pty_fd; 787 console_poll[MONITOR_FD].events = POLLIN; 788 789 console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd; 790 console_poll[VIRTIO_CONSOLE_FD].events = POLLIN; 791 792 if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd, 793 VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr, 794 virtcons_dev_page.dd.num_vq)) { 795 mpsslog("%s init_vr failed %s\n", 796 mic->name, strerror(errno)); 797 goto _close_pty; 798 } 799 800 copy.iovcnt = 1; 801 desc = get_device_desc(mic, VIRTIO_ID_CONSOLE); 802 803 for (;;) { 804 console_poll[MONITOR_FD].revents = 0; 805 console_poll[VIRTIO_CONSOLE_FD].revents = 0; 806 err = poll(console_poll, MAX_CONSOLE_FD, -1); 807 if (err < 0) { 808 mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__, 809 strerror(errno)); 810 continue; 811 } 812 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 813 err = wait_for_card_driver(mic, 814 mic->mic_console.virtio_console_fd, 815 VIRTIO_ID_CONSOLE); 816 if (err) { 817 mpsslog("%s %s %d Exiting...\n", 818 mic->name, __func__, __LINE__); 819 break; 820 } 821 } 822 823 if (console_poll[MONITOR_FD].revents & POLLIN) { 824 copy.iov = iov0; 825 len = readv(pty_fd, copy.iov, copy.iovcnt); 826 if (len > 0) { 827 #ifdef DEBUG 828 disp_iovec(mic, copy, __func__, __LINE__); 829 mpsslog("%s %s %d read from tap 0x%lx\n", 830 mic->name, __func__, __LINE__, 831 len); 832 #endif 833 spin_for_descriptors(mic, &tx_vr); 834 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr, 835 ©, len); 836 837 err = mic_virtio_copy(mic, 838 mic->mic_console.virtio_console_fd, 839 &tx_vr, ©); 840 if (err < 0) { 841 mpsslog("%s %s %d mic_virtio_copy %s\n", 842 mic->name, __func__, __LINE__, 843 strerror(errno)); 844 } 845 if (!err) 846 verify_out_len(mic, ©); 847 #ifdef DEBUG 848 disp_iovec(mic, copy, __func__, __LINE__); 849 mpsslog("%s %s %d wrote to net 0x%lx\n", 850 mic->name, __func__, __LINE__, 851 sum_iovec_len(copy)); 852 #endif 853 /* Reinitialize IOV for next run */ 854 iov0->iov_len = PAGE_SIZE; 855 } else if (len < 0) { 856 disp_iovec(mic, ©, __func__, __LINE__); 857 mpsslog("%s %s %d read failed %s ", 858 mic->name, __func__, __LINE__, 859 strerror(errno)); 860 mpsslog("cnt %d sum %zd\n", 861 copy.iovcnt, sum_iovec_len(©)); 862 } 863 } 864 865 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) { 866 while (rx_vr.info->avail_idx != 867 le16toh(rx_vr.vr.avail->idx)) { 868 copy.iov = iov1; 869 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr, 870 ©, PAGE_SIZE); 871 872 err = mic_virtio_copy(mic, 873 mic->mic_console.virtio_console_fd, 874 &rx_vr, ©); 875 if (!err) { 876 /* Set the correct output iov_len */ 877 iov1->iov_len = copy.out_len; 878 verify_out_len(mic, ©); 879 #ifdef DEBUG 880 disp_iovec(mic, copy, __func__, 881 __LINE__); 882 mpsslog("%s %s %d ", 883 mic->name, __func__, __LINE__); 884 mpsslog("read from net 0x%lx\n", 885 sum_iovec_len(copy)); 886 #endif 887 len = writev(pty_fd, 888 copy.iov, copy.iovcnt); 889 if (len != sum_iovec_len(©)) { 890 mpsslog("Tun write failed %s ", 891 strerror(errno)); 892 mpsslog("len 0x%zx ", len); 893 mpsslog("read_len 0x%zx\n", 894 sum_iovec_len(©)); 895 } else { 896 #ifdef DEBUG 897 disp_iovec(mic, copy, __func__, 898 __LINE__); 899 mpsslog("%s %s %d ", 900 mic->name, __func__, 901 __LINE__); 902 mpsslog("wrote to tap 0x%lx\n", 903 len); 904 #endif 905 } 906 } else { 907 mpsslog("%s %s %d mic_virtio_copy %s\n", 908 mic->name, __func__, __LINE__, 909 strerror(errno)); 910 break; 911 } 912 } 913 } 914 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR) 915 mpsslog("%s: %s: POLLERR\n", __func__, mic->name); 916 } 917 _close_pty: 918 close(pty_fd); 919 _return: 920 pthread_exit(NULL); 921 } 922 923 static void 924 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd) 925 { 926 char path[PATH_MAX]; 927 int fd, err; 928 929 snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id); 930 fd = open(path, O_RDWR); 931 if (fd < 0) { 932 mpsslog("Could not open %s %s\n", path, strerror(errno)); 933 return; 934 } 935 936 err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd); 937 if (err < 0) { 938 mpsslog("Could not add %d %s\n", dd->type, strerror(errno)); 939 close(fd); 940 return; 941 } 942 switch (dd->type) { 943 case VIRTIO_ID_NET: 944 mic->mic_net.virtio_net_fd = fd; 945 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name); 946 break; 947 case VIRTIO_ID_CONSOLE: 948 mic->mic_console.virtio_console_fd = fd; 949 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name); 950 break; 951 case VIRTIO_ID_BLOCK: 952 mic->mic_virtblk.virtio_block_fd = fd; 953 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name); 954 break; 955 } 956 } 957 958 static bool 959 set_backend_file(struct mic_info *mic) 960 { 961 FILE *config; 962 char buff[PATH_MAX], *line, *evv, *p; 963 964 snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id); 965 config = fopen(buff, "r"); 966 if (config == NULL) 967 return false; 968 do { /* look for "virtblk_backend=XXXX" */ 969 line = fgets(buff, PATH_MAX, config); 970 if (line == NULL) 971 break; 972 if (*line == '#') 973 continue; 974 p = strchr(line, '\n'); 975 if (p) 976 *p = '\0'; 977 } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0); 978 fclose(config); 979 if (line == NULL) 980 return false; 981 evv = strchr(line, '='); 982 if (evv == NULL) 983 return false; 984 mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1); 985 if (mic->mic_virtblk.backend_file == NULL) { 986 mpsslog("%s %d can't allocate memory\n", mic->name, mic->id); 987 return false; 988 } 989 strcpy(mic->mic_virtblk.backend_file, evv + 1); 990 return true; 991 } 992 993 #define SECTOR_SIZE 512 994 static bool 995 set_backend_size(struct mic_info *mic) 996 { 997 mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0, 998 SEEK_END); 999 if (mic->mic_virtblk.backend_size < 0) { 1000 mpsslog("%s: can't seek: %s\n", 1001 mic->name, mic->mic_virtblk.backend_file); 1002 return false; 1003 } 1004 virtblk_dev_page.blk_config.capacity = 1005 mic->mic_virtblk.backend_size / SECTOR_SIZE; 1006 if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0) 1007 virtblk_dev_page.blk_config.capacity++; 1008 1009 virtblk_dev_page.blk_config.capacity = 1010 htole64(virtblk_dev_page.blk_config.capacity); 1011 1012 return true; 1013 } 1014 1015 static bool 1016 open_backend(struct mic_info *mic) 1017 { 1018 if (!set_backend_file(mic)) 1019 goto _error_exit; 1020 mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR); 1021 if (mic->mic_virtblk.backend < 0) { 1022 mpsslog("%s: can't open: %s\n", mic->name, 1023 mic->mic_virtblk.backend_file); 1024 goto _error_free; 1025 } 1026 if (!set_backend_size(mic)) 1027 goto _error_close; 1028 mic->mic_virtblk.backend_addr = mmap(NULL, 1029 mic->mic_virtblk.backend_size, 1030 PROT_READ|PROT_WRITE, MAP_SHARED, 1031 mic->mic_virtblk.backend, 0L); 1032 if (mic->mic_virtblk.backend_addr == MAP_FAILED) { 1033 mpsslog("%s: can't map: %s %s\n", 1034 mic->name, mic->mic_virtblk.backend_file, 1035 strerror(errno)); 1036 goto _error_close; 1037 } 1038 return true; 1039 1040 _error_close: 1041 close(mic->mic_virtblk.backend); 1042 _error_free: 1043 free(mic->mic_virtblk.backend_file); 1044 _error_exit: 1045 return false; 1046 } 1047 1048 static void 1049 close_backend(struct mic_info *mic) 1050 { 1051 munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size); 1052 close(mic->mic_virtblk.backend); 1053 free(mic->mic_virtblk.backend_file); 1054 } 1055 1056 static bool 1057 start_virtblk(struct mic_info *mic, struct mic_vring *vring) 1058 { 1059 if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) { 1060 mpsslog("%s: blk_config is not 8 byte aligned.\n", 1061 mic->name); 1062 return false; 1063 } 1064 add_virtio_device(mic, &virtblk_dev_page.dd); 1065 if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd, 1066 VIRTIO_ID_BLOCK, vring, NULL, 1067 virtblk_dev_page.dd.num_vq)) { 1068 mpsslog("%s init_vr failed %s\n", 1069 mic->name, strerror(errno)); 1070 return false; 1071 } 1072 return true; 1073 } 1074 1075 static void 1076 stop_virtblk(struct mic_info *mic) 1077 { 1078 int vr_size, ret; 1079 1080 vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES, 1081 MIC_VIRTIO_RING_ALIGN) + 1082 sizeof(struct _mic_vring_info)); 1083 ret = munmap(mic->mic_virtblk.block_dp, 1084 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq); 1085 if (ret < 0) 1086 mpsslog("%s munmap errno %d\n", mic->name, errno); 1087 close(mic->mic_virtblk.virtio_block_fd); 1088 } 1089 1090 static __u8 1091 header_error_check(struct vring_desc *desc) 1092 { 1093 if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) { 1094 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n", 1095 __func__, __LINE__); 1096 return -EIO; 1097 } 1098 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) { 1099 mpsslog("%s() %d: alone\n", 1100 __func__, __LINE__); 1101 return -EIO; 1102 } 1103 if (le16toh(desc->flags) & VRING_DESC_F_WRITE) { 1104 mpsslog("%s() %d: not read\n", 1105 __func__, __LINE__); 1106 return -EIO; 1107 } 1108 return 0; 1109 } 1110 1111 static int 1112 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx) 1113 { 1114 struct iovec iovec; 1115 struct mic_copy_desc copy; 1116 1117 iovec.iov_len = sizeof(*hdr); 1118 iovec.iov_base = hdr; 1119 copy.iov = &iovec; 1120 copy.iovcnt = 1; 1121 copy.vr_idx = 0; /* only one vring on virtio_block */ 1122 copy.update_used = false; /* do not update used index */ 1123 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); 1124 } 1125 1126 static int 1127 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt) 1128 { 1129 struct mic_copy_desc copy; 1130 1131 copy.iov = iovec; 1132 copy.iovcnt = iovcnt; 1133 copy.vr_idx = 0; /* only one vring on virtio_block */ 1134 copy.update_used = false; /* do not update used index */ 1135 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); 1136 } 1137 1138 static __u8 1139 status_error_check(struct vring_desc *desc) 1140 { 1141 if (le32toh(desc->len) != sizeof(__u8)) { 1142 mpsslog("%s() %d: length is not sizeof(status)\n", 1143 __func__, __LINE__); 1144 return -EIO; 1145 } 1146 return 0; 1147 } 1148 1149 static int 1150 write_status(int fd, __u8 *status) 1151 { 1152 struct iovec iovec; 1153 struct mic_copy_desc copy; 1154 1155 iovec.iov_base = status; 1156 iovec.iov_len = sizeof(*status); 1157 copy.iov = &iovec; 1158 copy.iovcnt = 1; 1159 copy.vr_idx = 0; /* only one vring on virtio_block */ 1160 copy.update_used = true; /* Update used index */ 1161 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); 1162 } 1163 1164 #ifndef VIRTIO_BLK_T_GET_ID 1165 #define VIRTIO_BLK_T_GET_ID 8 1166 #endif 1167 1168 static void * 1169 virtio_block(void *arg) 1170 { 1171 struct mic_info *mic = (struct mic_info *)arg; 1172 int ret; 1173 struct pollfd block_poll; 1174 struct mic_vring vring; 1175 __u16 avail_idx; 1176 __u32 desc_idx; 1177 struct vring_desc *desc; 1178 struct iovec *iovec, *piov; 1179 __u8 status; 1180 __u32 buffer_desc_idx; 1181 struct virtio_blk_outhdr hdr; 1182 void *fos; 1183 1184 for (;;) { /* forever */ 1185 if (!open_backend(mic)) { /* No virtblk */ 1186 for (mic->mic_virtblk.signaled = 0; 1187 !mic->mic_virtblk.signaled;) 1188 sleep(1); 1189 continue; 1190 } 1191 1192 /* backend file is specified. */ 1193 if (!start_virtblk(mic, &vring)) 1194 goto _close_backend; 1195 iovec = malloc(sizeof(*iovec) * 1196 le32toh(virtblk_dev_page.blk_config.seg_max)); 1197 if (!iovec) { 1198 mpsslog("%s: can't alloc iovec: %s\n", 1199 mic->name, strerror(ENOMEM)); 1200 goto _stop_virtblk; 1201 } 1202 1203 block_poll.fd = mic->mic_virtblk.virtio_block_fd; 1204 block_poll.events = POLLIN; 1205 for (mic->mic_virtblk.signaled = 0; 1206 !mic->mic_virtblk.signaled;) { 1207 block_poll.revents = 0; 1208 /* timeout in 1 sec to see signaled */ 1209 ret = poll(&block_poll, 1, 1000); 1210 if (ret < 0) { 1211 mpsslog("%s %d: poll failed: %s\n", 1212 __func__, __LINE__, 1213 strerror(errno)); 1214 continue; 1215 } 1216 1217 if (!(block_poll.revents & POLLIN)) { 1218 #ifdef DEBUG 1219 mpsslog("%s %d: block_poll.revents=0x%x\n", 1220 __func__, __LINE__, block_poll.revents); 1221 #endif 1222 continue; 1223 } 1224 1225 /* POLLIN */ 1226 while (vring.info->avail_idx != 1227 le16toh(vring.vr.avail->idx)) { 1228 /* read header element */ 1229 avail_idx = 1230 vring.info->avail_idx & 1231 (vring.vr.num - 1); 1232 desc_idx = le16toh( 1233 vring.vr.avail->ring[avail_idx]); 1234 desc = &vring.vr.desc[desc_idx]; 1235 #ifdef DEBUG 1236 mpsslog("%s() %d: avail_idx=%d ", 1237 __func__, __LINE__, 1238 vring.info->avail_idx); 1239 mpsslog("vring.vr.num=%d desc=%p\n", 1240 vring.vr.num, desc); 1241 #endif 1242 status = header_error_check(desc); 1243 ret = read_header( 1244 mic->mic_virtblk.virtio_block_fd, 1245 &hdr, desc_idx); 1246 if (ret < 0) { 1247 mpsslog("%s() %d %s: ret=%d %s\n", 1248 __func__, __LINE__, 1249 mic->name, ret, 1250 strerror(errno)); 1251 break; 1252 } 1253 /* buffer element */ 1254 piov = iovec; 1255 status = 0; 1256 fos = mic->mic_virtblk.backend_addr + 1257 (hdr.sector * SECTOR_SIZE); 1258 buffer_desc_idx = next_desc(desc); 1259 desc_idx = buffer_desc_idx; 1260 for (desc = &vring.vr.desc[buffer_desc_idx]; 1261 desc->flags & VRING_DESC_F_NEXT; 1262 desc_idx = next_desc(desc), 1263 desc = &vring.vr.desc[desc_idx]) { 1264 piov->iov_len = desc->len; 1265 piov->iov_base = fos; 1266 piov++; 1267 fos += desc->len; 1268 } 1269 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */ 1270 if (hdr.type & ~(VIRTIO_BLK_T_OUT | 1271 VIRTIO_BLK_T_GET_ID)) { 1272 /* 1273 VIRTIO_BLK_T_IN - does not do 1274 anything. Probably for documenting. 1275 VIRTIO_BLK_T_SCSI_CMD - for 1276 virtio_scsi. 1277 VIRTIO_BLK_T_FLUSH - turned off in 1278 config space. 1279 VIRTIO_BLK_T_BARRIER - defined but not 1280 used in anywhere. 1281 */ 1282 mpsslog("%s() %d: type %x ", 1283 __func__, __LINE__, 1284 hdr.type); 1285 mpsslog("is not supported\n"); 1286 status = -ENOTSUP; 1287 1288 } else { 1289 ret = transfer_blocks( 1290 mic->mic_virtblk.virtio_block_fd, 1291 iovec, 1292 piov - iovec); 1293 if (ret < 0 && 1294 status != 0) 1295 status = ret; 1296 } 1297 /* write status and update used pointer */ 1298 if (status != 0) 1299 status = status_error_check(desc); 1300 ret = write_status( 1301 mic->mic_virtblk.virtio_block_fd, 1302 &status); 1303 #ifdef DEBUG 1304 mpsslog("%s() %d: write status=%d on desc=%p\n", 1305 __func__, __LINE__, 1306 status, desc); 1307 #endif 1308 } 1309 } 1310 free(iovec); 1311 _stop_virtblk: 1312 stop_virtblk(mic); 1313 _close_backend: 1314 close_backend(mic); 1315 } /* forever */ 1316 1317 pthread_exit(NULL); 1318 } 1319 1320 static void 1321 reset(struct mic_info *mic) 1322 { 1323 #define RESET_TIMEOUT 120 1324 int i = RESET_TIMEOUT; 1325 setsysfs(mic->name, "state", "reset"); 1326 while (i) { 1327 char *state; 1328 state = readsysfs(mic->name, "state"); 1329 if (!state) 1330 goto retry; 1331 mpsslog("%s: %s %d state %s\n", 1332 mic->name, __func__, __LINE__, state); 1333 1334 if (!strcmp(state, "ready")) { 1335 free(state); 1336 break; 1337 } 1338 free(state); 1339 retry: 1340 sleep(1); 1341 i--; 1342 } 1343 } 1344 1345 static int 1346 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status) 1347 { 1348 if (!strcmp(shutdown_status, "nop")) 1349 return MIC_NOP; 1350 if (!strcmp(shutdown_status, "crashed")) 1351 return MIC_CRASHED; 1352 if (!strcmp(shutdown_status, "halted")) 1353 return MIC_HALTED; 1354 if (!strcmp(shutdown_status, "poweroff")) 1355 return MIC_POWER_OFF; 1356 if (!strcmp(shutdown_status, "restart")) 1357 return MIC_RESTART; 1358 mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status); 1359 /* Invalid state */ 1360 assert(0); 1361 }; 1362 1363 static int get_mic_state(struct mic_info *mic) 1364 { 1365 char *state = NULL; 1366 enum mic_states mic_state; 1367 1368 while (!state) { 1369 state = readsysfs(mic->name, "state"); 1370 sleep(1); 1371 } 1372 mpsslog("%s: %s %d state %s\n", 1373 mic->name, __func__, __LINE__, state); 1374 1375 if (!strcmp(state, "ready")) { 1376 mic_state = MIC_READY; 1377 } else if (!strcmp(state, "booting")) { 1378 mic_state = MIC_BOOTING; 1379 } else if (!strcmp(state, "online")) { 1380 mic_state = MIC_ONLINE; 1381 } else if (!strcmp(state, "shutting_down")) { 1382 mic_state = MIC_SHUTTING_DOWN; 1383 } else if (!strcmp(state, "reset_failed")) { 1384 mic_state = MIC_RESET_FAILED; 1385 } else if (!strcmp(state, "resetting")) { 1386 mic_state = MIC_RESETTING; 1387 } else { 1388 mpsslog("%s: BUG invalid state %s\n", mic->name, state); 1389 assert(0); 1390 } 1391 1392 free(state); 1393 return mic_state; 1394 }; 1395 1396 static void mic_handle_shutdown(struct mic_info *mic) 1397 { 1398 #define SHUTDOWN_TIMEOUT 60 1399 int i = SHUTDOWN_TIMEOUT; 1400 char *shutdown_status; 1401 while (i) { 1402 shutdown_status = readsysfs(mic->name, "shutdown_status"); 1403 if (!shutdown_status) { 1404 sleep(1); 1405 continue; 1406 } 1407 mpsslog("%s: %s %d shutdown_status %s\n", 1408 mic->name, __func__, __LINE__, shutdown_status); 1409 switch (get_mic_shutdown_status(mic, shutdown_status)) { 1410 case MIC_RESTART: 1411 mic->restart = 1; 1412 case MIC_HALTED: 1413 case MIC_POWER_OFF: 1414 case MIC_CRASHED: 1415 free(shutdown_status); 1416 goto reset; 1417 default: 1418 break; 1419 } 1420 free(shutdown_status); 1421 sleep(1); 1422 i--; 1423 } 1424 reset: 1425 if (!i) 1426 mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n", 1427 mic->name, __func__, __LINE__, shutdown_status); 1428 reset(mic); 1429 } 1430 1431 static int open_state_fd(struct mic_info *mic) 1432 { 1433 char pathname[PATH_MAX]; 1434 int fd; 1435 1436 snprintf(pathname, PATH_MAX - 1, "%s/%s/%s", 1437 MICSYSFSDIR, mic->name, "state"); 1438 1439 fd = open(pathname, O_RDONLY); 1440 if (fd < 0) 1441 mpsslog("%s: opening file %s failed %s\n", 1442 mic->name, pathname, strerror(errno)); 1443 return fd; 1444 } 1445 1446 static int block_till_state_change(int fd, struct mic_info *mic) 1447 { 1448 struct pollfd ufds[1]; 1449 char value[PAGE_SIZE]; 1450 int ret; 1451 1452 ufds[0].fd = fd; 1453 ufds[0].events = POLLERR | POLLPRI; 1454 ret = poll(ufds, 1, -1); 1455 if (ret < 0) { 1456 mpsslog("%s: %s %d poll failed %s\n", 1457 mic->name, __func__, __LINE__, strerror(errno)); 1458 return ret; 1459 } 1460 1461 ret = lseek(fd, 0, SEEK_SET); 1462 if (ret < 0) { 1463 mpsslog("%s: %s %d Failed to seek to 0: %s\n", 1464 mic->name, __func__, __LINE__, strerror(errno)); 1465 return ret; 1466 } 1467 1468 ret = read(fd, value, sizeof(value)); 1469 if (ret < 0) { 1470 mpsslog("%s: %s %d Failed to read sysfs entry: %s\n", 1471 mic->name, __func__, __LINE__, strerror(errno)); 1472 return ret; 1473 } 1474 1475 return 0; 1476 } 1477 1478 static void * 1479 mic_config(void *arg) 1480 { 1481 struct mic_info *mic = (struct mic_info *)arg; 1482 int fd, ret, stat = 0; 1483 1484 fd = open_state_fd(mic); 1485 if (fd < 0) { 1486 mpsslog("%s: %s %d open state fd failed %s\n", 1487 mic->name, __func__, __LINE__, strerror(errno)); 1488 goto exit; 1489 } 1490 1491 do { 1492 ret = block_till_state_change(fd, mic); 1493 if (ret < 0) { 1494 mpsslog("%s: %s %d block_till_state_change error %s\n", 1495 mic->name, __func__, __LINE__, strerror(errno)); 1496 goto close_exit; 1497 } 1498 1499 switch (get_mic_state(mic)) { 1500 case MIC_SHUTTING_DOWN: 1501 mic_handle_shutdown(mic); 1502 break; 1503 case MIC_READY: 1504 case MIC_RESET_FAILED: 1505 ret = kill(mic->pid, SIGTERM); 1506 mpsslog("%s: %s %d kill pid %d ret %d\n", 1507 mic->name, __func__, __LINE__, 1508 mic->pid, ret); 1509 if (!ret) { 1510 ret = waitpid(mic->pid, &stat, 1511 WIFSIGNALED(stat)); 1512 mpsslog("%s: %s %d waitpid ret %d pid %d\n", 1513 mic->name, __func__, __LINE__, 1514 ret, mic->pid); 1515 } 1516 if (mic->boot_on_resume) { 1517 setsysfs(mic->name, "state", "boot"); 1518 mic->boot_on_resume = 0; 1519 } 1520 goto close_exit; 1521 default: 1522 break; 1523 } 1524 } while (1); 1525 1526 close_exit: 1527 close(fd); 1528 exit: 1529 init_mic(mic); 1530 pthread_exit(NULL); 1531 } 1532 1533 static void 1534 set_cmdline(struct mic_info *mic) 1535 { 1536 char buffer[PATH_MAX]; 1537 int len; 1538 1539 len = snprintf(buffer, PATH_MAX, 1540 "clocksource=tsc highres=off nohz=off "); 1541 len += snprintf(buffer + len, PATH_MAX - len, 1542 "cpufreq_on;corec6_off;pc3_off;pc6_off "); 1543 len += snprintf(buffer + len, PATH_MAX - len, 1544 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0", 1545 mic->id + 1); 1546 1547 setsysfs(mic->name, "cmdline", buffer); 1548 mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer); 1549 snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1); 1550 mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer); 1551 } 1552 1553 static void 1554 set_log_buf_info(struct mic_info *mic) 1555 { 1556 int fd; 1557 off_t len; 1558 char system_map[] = "/lib/firmware/mic/System.map"; 1559 char *map, *temp, log_buf[17] = {'\0'}; 1560 1561 fd = open(system_map, O_RDONLY); 1562 if (fd < 0) { 1563 mpsslog("%s: Opening System.map failed: %d\n", 1564 mic->name, errno); 1565 return; 1566 } 1567 len = lseek(fd, 0, SEEK_END); 1568 if (len < 0) { 1569 mpsslog("%s: Reading System.map size failed: %d\n", 1570 mic->name, errno); 1571 close(fd); 1572 return; 1573 } 1574 map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); 1575 if (map == MAP_FAILED) { 1576 mpsslog("%s: mmap of System.map failed: %d\n", 1577 mic->name, errno); 1578 close(fd); 1579 return; 1580 } 1581 temp = strstr(map, "__log_buf"); 1582 if (!temp) { 1583 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno); 1584 munmap(map, len); 1585 close(fd); 1586 return; 1587 } 1588 strncpy(log_buf, temp - 19, 16); 1589 setsysfs(mic->name, "log_buf_addr", log_buf); 1590 mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf); 1591 temp = strstr(map, "log_buf_len"); 1592 if (!temp) { 1593 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno); 1594 munmap(map, len); 1595 close(fd); 1596 return; 1597 } 1598 strncpy(log_buf, temp - 19, 16); 1599 setsysfs(mic->name, "log_buf_len", log_buf); 1600 mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf); 1601 munmap(map, len); 1602 close(fd); 1603 } 1604 1605 static void 1606 change_virtblk_backend(int x, siginfo_t *siginfo, void *p) 1607 { 1608 struct mic_info *mic; 1609 1610 for (mic = mic_list.next; mic != NULL; mic = mic->next) 1611 mic->mic_virtblk.signaled = 1/* true */; 1612 } 1613 1614 static void 1615 set_mic_boot_params(struct mic_info *mic) 1616 { 1617 set_log_buf_info(mic); 1618 set_cmdline(mic); 1619 } 1620 1621 static void * 1622 init_mic(void *arg) 1623 { 1624 struct mic_info *mic = (struct mic_info *)arg; 1625 struct sigaction ignore = { 1626 .sa_flags = 0, 1627 .sa_handler = SIG_IGN 1628 }; 1629 struct sigaction act = { 1630 .sa_flags = SA_SIGINFO, 1631 .sa_sigaction = change_virtblk_backend, 1632 }; 1633 char buffer[PATH_MAX]; 1634 int err, fd; 1635 1636 /* 1637 * Currently, one virtio block device is supported for each MIC card 1638 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon. 1639 * The signal informs the virtio block backend about a change in the 1640 * configuration file which specifies the virtio backend file name on 1641 * the host. Virtio block backend then re-reads the configuration file 1642 * and switches to the new block device. This signalling mechanism may 1643 * not be required once multiple virtio block devices are supported by 1644 * the MIC daemon. 1645 */ 1646 sigaction(SIGUSR1, &ignore, NULL); 1647 retry: 1648 fd = open_state_fd(mic); 1649 if (fd < 0) { 1650 mpsslog("%s: %s %d open state fd failed %s\n", 1651 mic->name, __func__, __LINE__, strerror(errno)); 1652 sleep(2); 1653 goto retry; 1654 } 1655 1656 if (mic->restart) { 1657 snprintf(buffer, PATH_MAX, "boot"); 1658 setsysfs(mic->name, "state", buffer); 1659 mpsslog("%s restarting mic %d\n", 1660 mic->name, mic->restart); 1661 mic->restart = 0; 1662 } 1663 1664 while (1) { 1665 while (block_till_state_change(fd, mic)) { 1666 mpsslog("%s: %s %d block_till_state_change error %s\n", 1667 mic->name, __func__, __LINE__, strerror(errno)); 1668 sleep(2); 1669 continue; 1670 } 1671 1672 if (get_mic_state(mic) == MIC_BOOTING) 1673 break; 1674 } 1675 1676 mic->pid = fork(); 1677 switch (mic->pid) { 1678 case 0: 1679 add_virtio_device(mic, &virtcons_dev_page.dd); 1680 add_virtio_device(mic, &virtnet_dev_page.dd); 1681 err = pthread_create(&mic->mic_console.console_thread, NULL, 1682 virtio_console, mic); 1683 if (err) 1684 mpsslog("%s virtcons pthread_create failed %s\n", 1685 mic->name, strerror(err)); 1686 err = pthread_create(&mic->mic_net.net_thread, NULL, 1687 virtio_net, mic); 1688 if (err) 1689 mpsslog("%s virtnet pthread_create failed %s\n", 1690 mic->name, strerror(err)); 1691 err = pthread_create(&mic->mic_virtblk.block_thread, NULL, 1692 virtio_block, mic); 1693 if (err) 1694 mpsslog("%s virtblk pthread_create failed %s\n", 1695 mic->name, strerror(err)); 1696 sigemptyset(&act.sa_mask); 1697 err = sigaction(SIGUSR1, &act, NULL); 1698 if (err) 1699 mpsslog("%s sigaction SIGUSR1 failed %s\n", 1700 mic->name, strerror(errno)); 1701 while (1) 1702 sleep(60); 1703 case -1: 1704 mpsslog("fork failed MIC name %s id %d errno %d\n", 1705 mic->name, mic->id, errno); 1706 break; 1707 default: 1708 err = pthread_create(&mic->config_thread, NULL, 1709 mic_config, mic); 1710 if (err) 1711 mpsslog("%s mic_config pthread_create failed %s\n", 1712 mic->name, strerror(err)); 1713 } 1714 1715 return NULL; 1716 } 1717 1718 static void 1719 start_daemon(void) 1720 { 1721 struct mic_info *mic; 1722 int err; 1723 1724 for (mic = mic_list.next; mic; mic = mic->next) { 1725 set_mic_boot_params(mic); 1726 err = pthread_create(&mic->init_thread, NULL, init_mic, mic); 1727 if (err) 1728 mpsslog("%s init_mic pthread_create failed %s\n", 1729 mic->name, strerror(err)); 1730 } 1731 1732 while (1) 1733 sleep(60); 1734 } 1735 1736 static int 1737 init_mic_list(void) 1738 { 1739 struct mic_info *mic = &mic_list; 1740 struct dirent *file; 1741 DIR *dp; 1742 int cnt = 0; 1743 1744 dp = opendir(MICSYSFSDIR); 1745 if (!dp) 1746 return 0; 1747 1748 while ((file = readdir(dp)) != NULL) { 1749 if (!strncmp(file->d_name, "mic", 3)) { 1750 mic->next = calloc(1, sizeof(struct mic_info)); 1751 if (mic->next) { 1752 mic = mic->next; 1753 mic->id = atoi(&file->d_name[3]); 1754 mic->name = malloc(strlen(file->d_name) + 16); 1755 if (mic->name) 1756 strcpy(mic->name, file->d_name); 1757 mpsslog("MIC name %s id %d\n", mic->name, 1758 mic->id); 1759 cnt++; 1760 } 1761 } 1762 } 1763 1764 closedir(dp); 1765 return cnt; 1766 } 1767 1768 void 1769 mpsslog(char *format, ...) 1770 { 1771 va_list args; 1772 char buffer[4096]; 1773 char ts[52], *ts1; 1774 time_t t; 1775 1776 if (logfp == NULL) 1777 return; 1778 1779 va_start(args, format); 1780 vsprintf(buffer, format, args); 1781 va_end(args); 1782 1783 time(&t); 1784 ts1 = ctime_r(&t, ts); 1785 ts1[strlen(ts1) - 1] = '\0'; 1786 fprintf(logfp, "%s: %s", ts1, buffer); 1787 1788 fflush(logfp); 1789 } 1790 1791 int 1792 main(int argc, char *argv[]) 1793 { 1794 int cnt; 1795 pid_t pid; 1796 1797 myname = argv[0]; 1798 1799 logfp = fopen(LOGFILE_NAME, "a+"); 1800 if (!logfp) { 1801 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME); 1802 exit(1); 1803 } 1804 pid = fork(); 1805 switch (pid) { 1806 case 0: 1807 break; 1808 case -1: 1809 exit(2); 1810 default: 1811 exit(0); 1812 } 1813 1814 mpsslog("MIC Daemon start\n"); 1815 1816 cnt = init_mic_list(); 1817 if (cnt == 0) { 1818 mpsslog("MIC module not loaded\n"); 1819 exit(3); 1820 } 1821 mpsslog("MIC found %d devices\n", cnt); 1822 1823 start_daemon(); 1824 1825 exit(0); 1826 }