About Kernel Documentation Linux Kernel Contact Linux Resources Linux Blog

Documentation / ia64 / err_inject.txt


Based on kernel version 4.16.1. Page generated on 2018-04-09 11:53 EST.

1	
2	IPF Machine Check (MC) error inject tool
3	========================================
4	
5	IPF Machine Check (MC) error inject tool is used to inject MC
6	errors from Linux. The tool is a test bed for IPF MC work flow including
7	hardware correctable error handling, OS recoverable error handling, MC
8	event logging, etc.
9	
10	The tool includes two parts: a kernel driver and a user application
11	sample. The driver provides interface to PAL to inject error
12	and query error injection capabilities. The driver code is in
13	arch/ia64/kernel/err_inject.c. The application sample (shown below)
14	provides a combination of various errors and calls the driver's interface
15	(sysfs interface) to inject errors or query error injection capabilities.
16	
17	The tool can be used to test Intel IPF machine MC handling capabilities.
18	It's especially useful for people who can not access hardware MC injection
19	tool to inject error. It's also very useful to integrate with other
20	software test suits to do stressful testing on IPF.
21	
22	Below is a sample application as part of the whole tool. The sample
23	can be used as a working test tool. Or it can be expanded to include
24	more features. It also can be a integrated into a library or other user
25	application to have more thorough test.
26	
27	The sample application takes err.conf as error configuration input. GCC
28	compiles the code. After you install err_inject driver, you can run
29	this sample application to inject errors.
30	
31	Errata: Itanium 2 Processors Specification Update lists some errata against
32	the pal_mc_error_inject PAL procedure. The following err.conf has been tested
33	on latest Montecito PAL.
34	
35	err.conf:
36	
37	#This is configuration file for err_inject_tool.
38	#The format of the each line is:
39	#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
40	#where
41	#	cpu: logical cpu number the error will be inject in.
42	#	loop: times the error will be injected.
43	#	interval: In second. every so often one error is injected.
44	#	err_type_info, err_struct_info: PAL parameters.
45	#
46	#Note: All values are hex w/o or w/ 0x prefix.
47	
48	
49	#On cpu2, inject only total 0x10 errors, interval 5 seconds
50	#corrected, data cache, hier-2, physical addr(assigned by tool code).
51	#working on Montecito latest PAL.
52	2, 10, 5, 4101, 95
53	
54	#On cpu4, inject and consume total 0x10 errors, interval 5 seconds
55	#corrected, data cache, hier-2, physical addr(assigned by tool code).
56	#working on Montecito latest PAL.
57	4, 10, 5, 4109, 95
58	
59	#On cpu15, inject and consume total 0x10 errors, interval 5 seconds
60	#recoverable, DTR0, hier-2.
61	#working on Montecito latest PAL.
62	0xf, 0x10, 5, 4249, 15
63	
64	The sample application source code:
65	
66	err_injection_tool.c:
67	
68	/*
69	 * This program is free software; you can redistribute it and/or modify
70	 * it under the terms of the GNU General Public License as published by
71	 * the Free Software Foundation; either version 2 of the License, or
72	 * (at your option) any later version.
73	 *
74	 * This program is distributed in the hope that it will be useful, but
75	 * WITHOUT ANY WARRANTY; without even the implied warranty of
76	 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
77	 * NON INFRINGEMENT.  See the GNU General Public License for more
78	 * details.
79	 *
80	 * You should have received a copy of the GNU General Public License
81	 * along with this program; if not, write to the Free Software
82	 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
83	 *
84	 * Copyright (C) 2006 Intel Co
85	 *	Fenghua Yu <fenghua.yu@intel.com>
86	 *
87	 */
88	#include <sys/types.h>
89	#include <sys/stat.h>
90	#include <fcntl.h>
91	#include <stdio.h>
92	#include <sched.h>
93	#include <unistd.h>
94	#include <stdlib.h>
95	#include <stdarg.h>
96	#include <string.h>
97	#include <errno.h>
98	#include <time.h>
99	#include <sys/ipc.h>
100	#include <sys/sem.h>
101	#include <sys/wait.h>
102	#include <sys/mman.h>
103	#include <sys/shm.h>
104	
105	#define MAX_FN_SIZE 		256
106	#define MAX_BUF_SIZE 		256
107	#define DATA_BUF_SIZE 		256
108	#define NR_CPUS 		512
109	#define MAX_TASK_NUM		2048
110	#define MIN_INTERVAL		5	// seconds
111	#define	ERR_DATA_BUFFER_SIZE 	3	// Three 8-byte.
112	#define PARA_FIELD_NUM		5
113	#define MASK_SIZE		(NR_CPUS/64)
114	#define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/"
115	
116	int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
117	
118	int verbose;
119	#define vbprintf if (verbose) printf
120	
121	int log_info(int cpu, const char *fmt, ...)
122	{
123		FILE *log;
124		char fn[MAX_FN_SIZE];
125		char buf[MAX_BUF_SIZE];
126		va_list args;
127	
128		sprintf(fn, "%d.log", cpu);
129		log=fopen(fn, "a+");
130		if (log==NULL) {
131			perror("Error open:");
132			return -1;
133		}
134	
135		va_start(args, fmt);
136		vprintf(fmt, args);
137		memset(buf, 0, MAX_BUF_SIZE);
138		vsprintf(buf, fmt, args);
139		va_end(args);
140	
141		fwrite(buf, sizeof(buf), 1, log);
142		fclose(log);
143	
144		return 0;
145	}
146	
147	typedef unsigned long u64;
148	typedef unsigned int  u32;
149	
150	typedef union err_type_info_u {
151		struct {
152			u64	mode		: 3,	/* 0-2 */
153				err_inj		: 3,	/* 3-5 */
154				err_sev		: 2,	/* 6-7 */
155				err_struct	: 5,	/* 8-12 */
156				struct_hier	: 3,	/* 13-15 */
157				reserved	: 48;	/* 16-63 */
158		} err_type_info_u;
159		u64	err_type_info;
160	} err_type_info_t;
161	
162	typedef union err_struct_info_u {
163		struct {
164			u64	siv		: 1,	/* 0	 */
165				c_t		: 2,	/* 1-2	 */
166				cl_p		: 3,	/* 3-5	 */
167				cl_id		: 3,	/* 6-8	 */
168				cl_dp		: 1,	/* 9	 */
169				reserved1	: 22,	/* 10-31 */
170				tiv		: 1,	/* 32	 */
171				trigger		: 4,	/* 33-36 */
172				trigger_pl 	: 3,	/* 37-39 */
173				reserved2 	: 24;	/* 40-63 */
174		} err_struct_info_cache;
175		struct {
176			u64	siv		: 1,	/* 0	 */
177				tt		: 2,	/* 1-2	 */
178				tc_tr		: 2,	/* 3-4	 */
179				tr_slot		: 8,	/* 5-12	 */
180				reserved1	: 19,	/* 13-31 */
181				tiv		: 1,	/* 32	 */
182				trigger		: 4,	/* 33-36 */
183				trigger_pl 	: 3,	/* 37-39 */
184				reserved2 	: 24;	/* 40-63 */
185		} err_struct_info_tlb;
186		struct {
187			u64	siv		: 1,	/* 0	 */
188				regfile_id	: 4,	/* 1-4	 */
189				reg_num		: 7,	/* 5-11	 */
190				reserved1	: 20,	/* 12-31 */
191				tiv		: 1,	/* 32	 */
192				trigger		: 4,	/* 33-36 */
193				trigger_pl 	: 3,	/* 37-39 */
194				reserved2 	: 24;	/* 40-63 */
195		} err_struct_info_register;
196		struct {
197			u64	reserved;
198		} err_struct_info_bus_processor_interconnect;
199		u64	err_struct_info;
200	} err_struct_info_t;
201	
202	typedef union err_data_buffer_u {
203		struct {
204			u64	trigger_addr;		/* 0-63		*/
205			u64	inj_addr;		/* 64-127 	*/
206			u64	way		: 5,	/* 128-132	*/
207				index		: 20,	/* 133-152	*/
208						: 39;	/* 153-191	*/
209		} err_data_buffer_cache;
210		struct {
211			u64	trigger_addr;		/* 0-63		*/
212			u64	inj_addr;		/* 64-127 	*/
213			u64	way		: 5,	/* 128-132	*/
214				index		: 20,	/* 133-152	*/
215				reserved	: 39;	/* 153-191	*/
216		} err_data_buffer_tlb;
217		struct {
218			u64	trigger_addr;		/* 0-63		*/
219		} err_data_buffer_register;
220		struct {
221			u64	reserved;		/* 0-63		*/
222		} err_data_buffer_bus_processor_interconnect;
223		u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
224	} err_data_buffer_t;
225	
226	typedef union capabilities_u {
227		struct {
228			u64	i		: 1,
229				d		: 1,
230				rv		: 1,
231				tag		: 1,
232				data		: 1,
233				mesi		: 1,
234				dp		: 1,
235				reserved1	: 3,
236				pa		: 1,
237				va		: 1,
238				wi		: 1,
239				reserved2	: 20,
240				trigger		: 1,
241				trigger_pl	: 1,
242				reserved3	: 30;
243		} capabilities_cache;
244		struct {
245			u64	d		: 1,
246				i		: 1,
247				rv		: 1,
248				tc		: 1,
249				tr		: 1,
250				reserved1	: 27,
251				trigger		: 1,
252				trigger_pl	: 1,
253				reserved2	: 30;
254		} capabilities_tlb;
255		struct {
256			u64	gr_b0		: 1,
257				gr_b1		: 1,
258				fr		: 1,
259				br		: 1,
260				pr		: 1,
261				ar		: 1,
262				cr		: 1,
263				rr		: 1,
264				pkr		: 1,
265				dbr		: 1,
266				ibr		: 1,
267				pmc		: 1,
268				pmd		: 1,
269				reserved1	: 3,
270				regnum		: 1,
271				reserved2	: 15,
272				trigger		: 1,
273				trigger_pl	: 1,
274				reserved3	: 30;
275		} capabilities_register;
276		struct {
277			u64	reserved;
278		} capabilities_bus_processor_interconnect;
279	} capabilities_t;
280	
281	typedef struct resources_s {
282		u64	ibr0		: 1,
283			ibr2		: 1,
284			ibr4		: 1,
285			ibr6		: 1,
286			dbr0		: 1,
287			dbr2		: 1,
288			dbr4		: 1,
289			dbr6		: 1,
290			reserved	: 48;
291	} resources_t;
292	
293	
294	long get_page_size(void)
295	{
296		long page_size=sysconf(_SC_PAGESIZE);
297		return page_size;
298	}
299	
300	#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
301	#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
302	#define SHM_VA 0x2000000100000000
303	
304	int shmid;
305	void *shmaddr;
306	
307	int create_shm(void)
308	{
309		key_t key;
310		char fn[MAX_FN_SIZE];
311	
312		/* cpu0 is always existing */
313		sprintf(fn, PATH_FORMAT, 0);
314		if ((key = ftok(fn, 's')) == -1) {
315			perror("ftok");
316			return -1;
317		}
318	
319		shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT);
320		if (shmid == -1) {
321			if (errno==EEXIST) {
322				shmid = shmget(key, SHM_SIZE, 0);
323				if (shmid == -1) {
324					perror("shmget");
325					return -1;
326				}
327			}
328			else {
329				perror("shmget");
330				return -1;
331			}
332		}
333		vbprintf("shmid=%d", shmid);
334	
335		/* connect to the segment: */
336		shmaddr = shmat(shmid, (void *)SHM_VA, 0);
337		if (shmaddr == (void*)-1) {
338			perror("shmat");
339			return -1;
340		}
341	
342		memset(shmaddr, 0, SHM_SIZE);
343		mlock(shmaddr, SHM_SIZE);
344	
345		return 0;
346	}
347	
348	int free_shm()
349	{
350		munlock(shmaddr, SHM_SIZE);
351	        shmdt(shmaddr);
352		semctl(shmid, 0, IPC_RMID);
353	
354		return 0;
355	}
356	
357	#ifdef _SEM_SEMUN_UNDEFINED
358	union semun
359	{
360		int val;
361		struct semid_ds *buf;
362		unsigned short int *array;
363		struct seminfo *__buf;
364	};
365	#endif
366	
367	u32 mode=1; /* 1: physical mode; 2: virtual mode. */
368	int one_lock=1;
369	key_t key[NR_CPUS];
370	int semid[NR_CPUS];
371	
372	int create_sem(int cpu)
373	{
374		union semun arg;
375		char fn[MAX_FN_SIZE];
376		int sid;
377	
378		sprintf(fn, PATH_FORMAT, cpu);
379		sprintf(fn, "%s/%s", fn, "err_type_info");
380		if ((key[cpu] = ftok(fn, 'e')) == -1) {
381			perror("ftok");
382			return -1;
383		}
384	
385		if (semid[cpu]!=0)
386			return 0;
387	
388		/* clear old semaphore */
389		if ((sid = semget(key[cpu], 1, 0)) != -1)
390			semctl(sid, 0, IPC_RMID);
391	
392		/* get one semaphore */
393		if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) {
394			perror("semget");
395			printf("Please remove semaphore with key=0x%lx, then run the tool.\n",
396				(u64)key[cpu]);
397			return -1;
398		}
399	
400		vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu,
401			(u64)key[cpu]);
402		/* initialize the semaphore to 1: */
403		arg.val = 1;
404		if (semctl(semid[cpu], 0, SETVAL, arg) == -1) {
405			perror("semctl");
406			return -1;
407		}
408	
409		return 0;
410	}
411	
412	static int lock(int cpu)
413	{
414		struct sembuf lock;
415	
416		lock.sem_num = cpu;
417		lock.sem_op = 1;
418		semop(semid[cpu], &lock, 1);
419	
420	        return 0;
421	}
422	
423	static int unlock(int cpu)
424	{
425		struct sembuf unlock;
426	
427		unlock.sem_num = cpu;
428		unlock.sem_op = -1;
429		semop(semid[cpu], &unlock, 1);
430	
431	        return 0;
432	}
433	
434	void free_sem(int cpu)
435	{
436		semctl(semid[cpu], 0, IPC_RMID);
437	}
438	
439	int wr_multi(char *fn, unsigned long *data, int size)
440	{
441		int fd;
442		char buf[MAX_BUF_SIZE];
443		int ret;
444	
445		if (size==1)
446			sprintf(buf, "%lx", *data);
447		else if (size==3)
448			sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]);
449		else {
450			fprintf(stderr,"write to file with wrong size!\n");
451			return -1;
452		}
453	
454		fd=open(fn, O_RDWR);
455		if (!fd) {
456			perror("Error:");
457			return -1;
458		}
459		ret=write(fd, buf, sizeof(buf));
460		close(fd);
461		return ret;
462	}
463	
464	int wr(char *fn, unsigned long data)
465	{
466		return wr_multi(fn, &data, 1);
467	}
468	
469	int rd(char *fn, unsigned long *data)
470	{
471		int fd;
472		char buf[MAX_BUF_SIZE];
473	
474		fd=open(fn, O_RDONLY);
475		if (fd<0) {
476			perror("Error:");
477			return -1;
478		}
479		read(fd, buf, MAX_BUF_SIZE);
480		*data=strtoul(buf, NULL, 16);
481		close(fd);
482		return 0;
483	}
484	
485	int rd_status(char *path, int *status)
486	{
487		char fn[MAX_FN_SIZE];
488		sprintf(fn, "%s/status", path);
489		if (rd(fn, (u64*)status)<0) {
490			perror("status reading error.\n");
491			return -1;
492		}
493	
494		return 0;
495	}
496	
497	int rd_capabilities(char *path, u64 *capabilities)
498	{
499		char fn[MAX_FN_SIZE];
500		sprintf(fn, "%s/capabilities", path);
501		if (rd(fn, capabilities)<0) {
502			perror("capabilities reading error.\n");
503			return -1;
504		}
505	
506		return 0;
507	}
508	
509	int rd_all(char *path)
510	{
511		unsigned long err_type_info, err_struct_info, err_data_buffer;
512		int status;
513		unsigned long capabilities, resources;
514		char fn[MAX_FN_SIZE];
515	
516		sprintf(fn, "%s/err_type_info", path);
517		if (rd(fn, &err_type_info)<0) {
518			perror("err_type_info reading error.\n");
519			return -1;
520		}
521		printf("err_type_info=%lx\n", err_type_info);
522	
523		sprintf(fn, "%s/err_struct_info", path);
524		if (rd(fn, &err_struct_info)<0) {
525			perror("err_struct_info reading error.\n");
526			return -1;
527		}
528		printf("err_struct_info=%lx\n", err_struct_info);
529	
530		sprintf(fn, "%s/err_data_buffer", path);
531		if (rd(fn, &err_data_buffer)<0) {
532			perror("err_data_buffer reading error.\n");
533			return -1;
534		}
535		printf("err_data_buffer=%lx\n", err_data_buffer);
536	
537		sprintf(fn, "%s/status", path);
538		if (rd("status", (u64*)&status)<0) {
539			perror("status reading error.\n");
540			return -1;
541		}
542		printf("status=%d\n", status);
543	
544		sprintf(fn, "%s/capabilities", path);
545		if (rd(fn,&capabilities)<0) {
546			perror("capabilities reading error.\n");
547			return -1;
548		}
549		printf("capabilities=%lx\n", capabilities);
550	
551		sprintf(fn, "%s/resources", path);
552		if (rd(fn, &resources)<0) {
553			perror("resources reading error.\n");
554			return -1;
555		}
556		printf("resources=%lx\n", resources);
557	
558		return 0;
559	}
560	
561	int query_capabilities(char *path, err_type_info_t err_type_info,
562				u64 *capabilities)
563	{
564		char fn[MAX_FN_SIZE];
565		err_struct_info_t err_struct_info;
566		err_data_buffer_t err_data_buffer;
567	
568		err_struct_info.err_struct_info=0;
569		memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8);
570	
571		sprintf(fn, "%s/err_type_info", path);
572		wr(fn, err_type_info.err_type_info);
573		sprintf(fn, "%s/err_struct_info", path);
574		wr(fn, 0x0);
575		sprintf(fn, "%s/err_data_buffer", path);
576		wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
577	
578		// Fire pal_mc_error_inject procedure.
579		sprintf(fn, "%s/call_start", path);
580		wr(fn, mode);
581	
582		if (rd_capabilities(path, capabilities)<0)
583			return -1;
584	
585		return 0;
586	}
587	
588	int query_all_capabilities()
589	{
590		int status;
591		err_type_info_t err_type_info;
592		int err_sev, err_struct, struct_hier;
593		int cap=0;
594		u64 capabilities;
595		char path[MAX_FN_SIZE];
596	
597		err_type_info.err_type_info=0;			// Initial
598		err_type_info.err_type_info_u.mode=0;		// Query mode;
599		err_type_info.err_type_info_u.err_inj=0;
600	
601		printf("All capabilities implemented in pal_mc_error_inject:\n");
602		sprintf(path, PATH_FORMAT ,0);
603		for (err_sev=0;err_sev<3;err_sev++)
604			for (err_struct=0;err_struct<5;err_struct++)
605				for (struct_hier=0;struct_hier<5;struct_hier++)
606		{
607			status=-1;
608			capabilities=0;
609			err_type_info.err_type_info_u.err_sev=err_sev;
610			err_type_info.err_type_info_u.err_struct=err_struct;
611			err_type_info.err_type_info_u.struct_hier=struct_hier;
612	
613			if (query_capabilities(path, err_type_info, &capabilities)<0)
614				continue;
615	
616			if (rd_status(path, &status)<0)
617				continue;
618	
619			if (status==0) {
620				cap=1;
621				printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ",
622					err_sev, err_struct, struct_hier);
623				printf("capabilities 0x%lx\n", capabilities);
624			}
625		}
626		if (!cap) {
627			printf("No capabilities supported.\n");
628			return 0;
629		}
630	
631		return 0;
632	}
633	
634	int err_inject(int cpu, char *path, err_type_info_t err_type_info,
635			err_struct_info_t err_struct_info,
636			err_data_buffer_t err_data_buffer)
637	{
638		int status;
639		char fn[MAX_FN_SIZE];
640	
641		log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ",
642			err_type_info.err_type_info,
643			err_struct_info.err_struct_info);
644		log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n",
645			err_data_buffer.err_data_buffer[0],
646			err_data_buffer.err_data_buffer[1],
647			err_data_buffer.err_data_buffer[2]);
648		sprintf(fn, "%s/err_type_info", path);
649		wr(fn, err_type_info.err_type_info);
650		sprintf(fn, "%s/err_struct_info", path);
651		wr(fn, err_struct_info.err_struct_info);
652		sprintf(fn, "%s/err_data_buffer", path);
653		wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
654	
655		// Fire pal_mc_error_inject procedure.
656		sprintf(fn, "%s/call_start", path);
657		wr(fn,mode);
658	
659		if (rd_status(path, &status)<0) {
660			vbprintf("fail: read status\n");
661			return -100;
662		}
663	
664		if (status!=0) {
665			log_info(cpu, "fail: status=%d\n", status);
666			return status;
667		}
668	
669		return status;
670	}
671	
672	static int construct_data_buf(char *path, err_type_info_t err_type_info,
673			err_struct_info_t err_struct_info,
674			err_data_buffer_t *err_data_buffer,
675			void *va1)
676	{
677		char fn[MAX_FN_SIZE];
678		u64 virt_addr=0, phys_addr=0;
679	
680		vbprintf("va1=%lx\n", (u64)va1);
681		memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8);
682	
683		switch (err_type_info.err_type_info_u.err_struct) {
684			case 1: // Cache
685				switch (err_struct_info.err_struct_info_cache.cl_id) {
686					case 1: //Virtual addr
687						err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1;
688						break;
689					case 2: //Phys addr
690						sprintf(fn, "%s/virtual_to_phys", path);
691						virt_addr=(u64)va1;
692						if (wr(fn,virt_addr)<0)
693							return -1;
694						rd(fn, &phys_addr);
695						err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr;
696						break;
697					default:
698						printf("Not supported cl_id\n");
699						break;
700				}
701				break;
702			case 2: //  TLB
703				break;
704			case 3: //  Register file
705				break;
706			case 4: //  Bus/system interconnect
707			default:
708				printf("Not supported err_struct\n");
709				break;
710		}
711	
712		return 0;
713	}
714	
715	typedef struct {
716		u64 cpu;
717		u64 loop;
718		u64 interval;
719		u64 err_type_info;
720		u64 err_struct_info;
721		u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
722	} parameters_t;
723	
724	parameters_t line_para;
725	int para;
726	
727	static int empty_data_buffer(u64 *err_data_buffer)
728	{
729		int empty=1;
730		int i;
731	
732		for (i=0;i<ERR_DATA_BUFFER_SIZE; i++)
733		   if (err_data_buffer[i]!=-1)
734			empty=0;
735	
736		return empty;
737	}
738	
739	int err_inj()
740	{
741		err_type_info_t err_type_info;
742		err_struct_info_t err_struct_info;
743		err_data_buffer_t err_data_buffer;
744		int count;
745		FILE *fp;
746		unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf;
747		u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE];
748		int num;
749		int i;
750		char path[MAX_FN_SIZE];
751		parameters_t parameters[MAX_TASK_NUM]={};
752		pid_t child_pid[MAX_TASK_NUM];
753		time_t current_time;
754		int status;
755	
756		if (!para) {
757		    fp=fopen("err.conf", "r");
758		    if (fp==NULL) {
759			perror("Error open err.conf");
760			return -1;
761		    }
762	
763		    num=0;
764		    while (!feof(fp)) {
765			char buf[256];
766			memset(buf,0,256);
767			fgets(buf, 256, fp);
768			count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
769					&cpu, &loop, &interval,&err_type_info_conf,
770					&err_struct_info_conf,
771					&err_data_buffer_conf[0],
772					&err_data_buffer_conf[1],
773					&err_data_buffer_conf[2]);
774			if (count!=PARA_FIELD_NUM+3) {
775				err_data_buffer_conf[0]=-1;
776				err_data_buffer_conf[1]=-1;
777				err_data_buffer_conf[2]=-1;
778				count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n",
779					&cpu, &loop, &interval,&err_type_info_conf,
780					&err_struct_info_conf);
781				if (count!=PARA_FIELD_NUM)
782					continue;
783			}
784	
785			parameters[num].cpu=cpu;
786			parameters[num].loop=loop;
787			parameters[num].interval= interval>MIN_INTERVAL
788						  ?interval:MIN_INTERVAL;
789			parameters[num].err_type_info=err_type_info_conf;
790			parameters[num].err_struct_info=err_struct_info_conf;
791			memcpy(parameters[num++].err_data_buffer,
792				err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ;
793	
794			if (num>=MAX_TASK_NUM)
795				break;
796		    }
797		}
798		else {
799			parameters[0].cpu=line_para.cpu;
800			parameters[0].loop=line_para.loop;
801			parameters[0].interval= line_para.interval>MIN_INTERVAL
802						  ?line_para.interval:MIN_INTERVAL;
803			parameters[0].err_type_info=line_para.err_type_info;
804			parameters[0].err_struct_info=line_para.err_struct_info;
805			memcpy(parameters[0].err_data_buffer,
806				line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ;
807	
808			num=1;
809		}
810	
811		/* Create semaphore: If one_lock, one semaphore for all processors.
812		   Otherwise, one semaphore for each processor. */
813		if (one_lock) {
814			if (create_sem(0)) {
815				printf("Can not create semaphore...exit\n");
816				free_sem(0);
817				return -1;
818			}
819		}
820		else {
821			for (i=0;i<num;i++) {
822			   if (create_sem(parameters[i].cpu)) {
823				printf("Can not create semaphore for cpu%d...exit\n",i);
824				free_sem(parameters[num].cpu);
825				return -1;
826			   }
827			}
828		}
829	
830		/* Create a shm segment which will be used to inject/consume errors on.*/
831		if (create_shm()==-1) {
832			printf("Error to create shm...exit\n");
833			return -1;
834		}
835	
836		for (i=0;i<num;i++) {
837			pid_t pid;
838	
839			current_time=time(NULL);
840			log_info(parameters[i].cpu, "\nBegine at %s", ctime(&current_time));
841			log_info(parameters[i].cpu, "Configurations:\n");
842			log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)",
843				parameters[i].cpu,
844				parameters[i].loop,
845				parameters[i].interval);
846			log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n",
847				parameters[i].err_type_info,
848				parameters[i].err_struct_info);
849	
850			sprintf(path, PATH_FORMAT, (int)parameters[i].cpu);
851			err_type_info.err_type_info=parameters[i].err_type_info;
852			err_struct_info.err_struct_info=parameters[i].err_struct_info;
853			memcpy(err_data_buffer.err_data_buffer,
854				parameters[i].err_data_buffer,
855				ERR_DATA_BUFFER_SIZE*8);
856	
857			pid=fork();
858			if (pid==0) {
859				unsigned long mask[MASK_SIZE];
860				int j, k;
861	
862				void *va1, *va2;
863	
864				/* Allocate two memory areas va1 and va2 in shm */
865				va1=shmaddr+parameters[i].cpu*PAGE_SIZE;
866				va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE;
867	
868				vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2);
869				memset(va1, 0x1, PAGE_SIZE);
870				memset(va2, 0x2, PAGE_SIZE);
871	
872				if (empty_data_buffer(err_data_buffer.err_data_buffer))
873					/* If not specified yet, construct data buffer
874					 * with va1
875					 */
876					construct_data_buf(path, err_type_info,
877						err_struct_info, &err_data_buffer,va1);
878	
879				for (j=0;j<MASK_SIZE;j++)
880					mask[j]=0;
881	
882				cpu=parameters[i].cpu;
883				k = cpu%64;
884				j = cpu/64;
885				mask[j] = 1UL << k;
886	
887				if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) {
888					perror("Error sched_setaffinity:");
889					return -1;
890				}
891	
892				for (j=0; j<parameters[i].loop; j++) {
893					log_info(parameters[i].cpu,"Injection ");
894					log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ",
895	
896						parameters[i].cpu,j+1, parameters[i].loop);
897	
898					/* Hold the lock */
899					if (one_lock)
900						lock(0);
901					else
902					/* Hold lock on this cpu */
903						lock(parameters[i].cpu);
904	
905					if ((status=err_inject(parameters[i].cpu,
906						   path, err_type_info,
907						   err_struct_info, err_data_buffer))
908						   ==0) {
909						/* consume the error for "inject only"*/
910						memcpy(va2, va1, PAGE_SIZE);
911						memcpy(va1, va2, PAGE_SIZE);
912						log_info(parameters[i].cpu,
913							"successful\n");
914					}
915					else {
916						log_info(parameters[i].cpu,"fail:");
917						log_info(parameters[i].cpu,
918							"status=%d\n", status);
919						unlock(parameters[i].cpu);
920						break;
921					}
922					if (one_lock)
923					/* Release the lock */
924						unlock(0);
925					/* Release lock on this cpu */
926					else
927						unlock(parameters[i].cpu);
928	
929					if (j < parameters[i].loop-1)
930						sleep(parameters[i].interval);
931				}
932				current_time=time(NULL);
933				log_info(parameters[i].cpu, "Done at %s", ctime(&current_time));
934				return 0;
935			}
936			else if (pid<0) {
937				perror("Error fork:");
938				continue;
939			}
940			child_pid[i]=pid;
941		}
942		for (i=0;i<num;i++)
943			waitpid(child_pid[i], NULL, 0);
944	
945		if (one_lock)
946			free_sem(0);
947		else
948			for (i=0;i<num;i++)
949				free_sem(parameters[i].cpu);
950	
951		printf("All done.\n");
952	
953		return 0;
954	}
955	
956	void help()
957	{
958		printf("err_inject_tool:\n");
959		printf("\t-q: query all capabilities. default: off\n");
960		printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n");
961		printf("\t-i: inject errors. default: off\n");
962		printf("\t-l: one lock per cpu. default: one lock for all\n");
963		printf("\t-e: error parameters:\n");
964		printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n");
965		printf("\t\t   cpu: logical cpu number the error will be inject in.\n");
966		printf("\t\t   loop: times the error will be injected.\n");
967		printf("\t\t   interval: In second. every so often one error is injected.\n");
968		printf("\t\t   err_type_info, err_struct_info: PAL parameters.\n");
969		printf("\t\t   err_data_buffer: PAL parameter. Optional. If not present,\n");
970		printf("\t\t                    it's constructed by tool automatically. Be\n");
971		printf("\t\t                    careful to provide err_data_buffer and make\n");
972		printf("\t\t                    sure it's working with the environment.\n");
973		printf("\t    Note:no space between error parameters.\n");
974		printf("\t    default: Take error parameters from err.conf instead of command line.\n");
975		printf("\t-v: verbose. default: off\n");
976		printf("\t-h: help\n\n");
977		printf("The tool will take err.conf file as ");
978		printf("input to inject single or multiple errors ");
979		printf("on one or multiple cpus in parallel.\n");
980	}
981	
982	int main(int argc, char **argv)
983	{
984		char c;
985		int do_err_inj=0;
986		int do_query_all=0;
987		int count;
988		u32 m;
989	
990		/* Default one lock for all cpu's */
991		one_lock=1;
992		while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF)
993			switch (c) {
994				case 'm':	/* Procedure mode. 1: phys 2: virt */
995					count=sscanf(optarg, "%x", &m);
996					if (count!=1 || (m!=1 && m!=2)) {
997						printf("Wrong mode number.\n");
998						help();
999						return -1;
1000					}
1001					mode=m;
1002					break;
1003				case 'i':	/* Inject errors */
1004					do_err_inj=1;
1005					break;
1006				case 'q':	/* Query */
1007					do_query_all=1;
1008					break;
1009				case 'v':	/* Verbose */
1010					verbose=1;
1011					break;
1012				case 'l':	/* One lock per cpu */
1013					one_lock=0;
1014					break;
1015				case 'e':	/* error arguments */
1016					/* Take parameters:
1017					 * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer]
1018					 * err_data_buffer is optional. Recommend not to specify
1019					 * err_data_buffer. Better to use tool to generate it.
1020					 */
1021					count=sscanf(optarg,
1022						"%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
1023						&line_para.cpu,
1024						&line_para.loop,
1025						&line_para.interval,
1026						&line_para.err_type_info,
1027						&line_para.err_struct_info,
1028						&line_para.err_data_buffer[0],
1029						&line_para.err_data_buffer[1],
1030						&line_para.err_data_buffer[2]);
1031					if (count!=PARA_FIELD_NUM+3) {
1032					    line_para.err_data_buffer[0]=-1,
1033					    line_para.err_data_buffer[1]=-1,
1034				 	    line_para.err_data_buffer[2]=-1;
1035					    count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n",
1036							&line_para.cpu,
1037							&line_para.loop,
1038							&line_para.interval,
1039							&line_para.err_type_info,
1040							&line_para.err_struct_info);
1041					    if (count!=PARA_FIELD_NUM) {
1042						printf("Wrong error arguments.\n");
1043						help();
1044						return -1;
1045					    }
1046					}
1047					para=1;
1048					break;
1049				continue;
1050					break;
1051				case 'h':
1052					help();
1053					return 0;
1054				default:
1055					break;
1056			}
1057	
1058		if (do_query_all)
1059			query_all_capabilities();
1060		if (do_err_inj)
1061			err_inj();
1062	
1063		if (!do_query_all &&  !do_err_inj)
1064			help();
1065	
1066		return 0;
1067	}
Hide Line Numbers


About Kernel Documentation Linux Kernel Contact Linux Resources Linux Blog