About Kernel Documentation Linux Kernel Contact Linux Resources Linux Blog

Documentation / filesystems / Locking




Custom Search

Based on kernel version 4.7.2. Page generated on 2016-08-22 22:45 EST.

1		The text below describes the locking rules for VFS-related methods.
2	It is (believed to be) up-to-date. *Please*, if you change anything in
3	prototypes or locking protocols - update this file. And update the relevant
4	instances in the tree, don't leave that to maintainers of filesystems/devices/
5	etc. At the very least, put the list of dubious cases in the end of this file.
6	Don't turn it into log - maintainers of out-of-the-tree code are supposed to
7	be able to use diff(1).
8		Thing currently missing here: socket operations. Alexey?
9	
10	--------------------------- dentry_operations --------------------------
11	prototypes:
12		int (*d_revalidate)(struct dentry *, unsigned int);
13		int (*d_weak_revalidate)(struct dentry *, unsigned int);
14		int (*d_hash)(const struct dentry *, struct qstr *);
15		int (*d_compare)(const struct dentry *, const struct dentry *,
16				unsigned int, const char *, const struct qstr *);
17		int (*d_delete)(struct dentry *);
18		void (*d_release)(struct dentry *);
19		void (*d_iput)(struct dentry *, struct inode *);
20		char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
21		struct vfsmount *(*d_automount)(struct path *path);
22		int (*d_manage)(struct dentry *, bool);
23	
24	locking rules:
25			rename_lock	->d_lock	may block	rcu-walk
26	d_revalidate:	no		no		yes (ref-walk)	maybe
27	d_weak_revalidate:no		no		yes	 	no
28	d_hash		no		no		no		maybe
29	d_compare:	yes		no		no		maybe
30	d_delete:	no		yes		no		no
31	d_release:	no		no		yes		no
32	d_prune:        no              yes             no              no
33	d_iput:		no		no		yes		no
34	d_dname:	no		no		no		no
35	d_automount:	no		no		yes		no
36	d_manage:	no		no		yes (ref-walk)	maybe
37	
38	--------------------------- inode_operations --------------------------- 
39	prototypes:
40		int (*create) (struct inode *,struct dentry *,umode_t, bool);
41		struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
42		int (*link) (struct dentry *,struct inode *,struct dentry *);
43		int (*unlink) (struct inode *,struct dentry *);
44		int (*symlink) (struct inode *,struct dentry *,const char *);
45		int (*mkdir) (struct inode *,struct dentry *,umode_t);
46		int (*rmdir) (struct inode *,struct dentry *);
47		int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
48		int (*rename) (struct inode *, struct dentry *,
49				struct inode *, struct dentry *);
50		int (*rename2) (struct inode *, struct dentry *,
51				struct inode *, struct dentry *, unsigned int);
52		int (*readlink) (struct dentry *, char __user *,int);
53		const char *(*get_link) (struct dentry *, struct inode *, void **);
54		void (*truncate) (struct inode *);
55		int (*permission) (struct inode *, int, unsigned int);
56		int (*get_acl)(struct inode *, int);
57		int (*setattr) (struct dentry *, struct iattr *);
58		int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
59		int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
60		ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
61		ssize_t (*listxattr) (struct dentry *, char *, size_t);
62		int (*removexattr) (struct dentry *, const char *);
63		int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
64		void (*update_time)(struct inode *, struct timespec *, int);
65		int (*atomic_open)(struct inode *, struct dentry *,
66					struct file *, unsigned open_flag,
67					umode_t create_mode, int *opened);
68		int (*tmpfile) (struct inode *, struct dentry *, umode_t);
69		int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
70	
71	locking rules:
72		all may block
73			i_mutex(inode)
74	lookup:		yes
75	create:		yes
76	link:		yes (both)
77	mknod:		yes
78	symlink:	yes
79	mkdir:		yes
80	unlink:		yes (both)
81	rmdir:		yes (both)	(see below)
82	rename:		yes (all)	(see below)
83	rename2:	yes (all)	(see below)
84	readlink:	no
85	get_link:	no
86	setattr:	yes
87	permission:	no (may not block if called in rcu-walk mode)
88	get_acl:	no
89	getattr:	no
90	setxattr:	yes
91	getxattr:	no
92	listxattr:	no
93	removexattr:	yes
94	fiemap:		no
95	update_time:	no
96	atomic_open:	yes
97	tmpfile:	no
98	dentry_open:	no
99	
100		Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
101	victim.
102		cross-directory ->rename() and rename2() has (per-superblock)
103	->s_vfs_rename_sem.
104	
105	See Documentation/filesystems/directory-locking for more detailed discussion
106	of the locking scheme for directory operations.
107	
108	--------------------------- super_operations ---------------------------
109	prototypes:
110		struct inode *(*alloc_inode)(struct super_block *sb);
111		void (*destroy_inode)(struct inode *);
112		void (*dirty_inode) (struct inode *, int flags);
113		int (*write_inode) (struct inode *, struct writeback_control *wbc);
114		int (*drop_inode) (struct inode *);
115		void (*evict_inode) (struct inode *);
116		void (*put_super) (struct super_block *);
117		int (*sync_fs)(struct super_block *sb, int wait);
118		int (*freeze_fs) (struct super_block *);
119		int (*unfreeze_fs) (struct super_block *);
120		int (*statfs) (struct dentry *, struct kstatfs *);
121		int (*remount_fs) (struct super_block *, int *, char *);
122		void (*umount_begin) (struct super_block *);
123		int (*show_options)(struct seq_file *, struct dentry *);
124		ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
125		ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
126		int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
127	
128	locking rules:
129		All may block [not true, see below]
130				s_umount
131	alloc_inode:
132	destroy_inode:
133	dirty_inode:
134	write_inode:
135	drop_inode:				!!!inode->i_lock!!!
136	evict_inode:
137	put_super:		write
138	sync_fs:		read
139	freeze_fs:		write
140	unfreeze_fs:		write
141	statfs:			maybe(read)	(see below)
142	remount_fs:		write
143	umount_begin:		no
144	show_options:		no		(namespace_sem)
145	quota_read:		no		(see below)
146	quota_write:		no		(see below)
147	bdev_try_to_free_page:	no		(see below)
148	
149	->statfs() has s_umount (shared) when called by ustat(2) (native or
150	compat), but that's an accident of bad API; s_umount is used to pin
151	the superblock down when we only have dev_t given us by userland to
152	identify the superblock.  Everything else (statfs(), fstatfs(), etc.)
153	doesn't hold it when calling ->statfs() - superblock is pinned down
154	by resolving the pathname passed to syscall.
155	->quota_read() and ->quota_write() functions are both guaranteed to
156	be the only ones operating on the quota file by the quota code (via
157	dqio_sem) (unless an admin really wants to screw up something and
158	writes to quota files with quotas on). For other details about locking
159	see also dquot_operations section.
160	->bdev_try_to_free_page is called from the ->releasepage handler of
161	the block device inode.  See there for more details.
162	
163	--------------------------- file_system_type ---------------------------
164	prototypes:
165		struct dentry *(*mount) (struct file_system_type *, int,
166			       const char *, void *);
167		void (*kill_sb) (struct super_block *);
168	locking rules:
169			may block
170	mount		yes
171	kill_sb		yes
172	
173	->mount() returns ERR_PTR or the root dentry; its superblock should be locked
174	on return.
175	->kill_sb() takes a write-locked superblock, does all shutdown work on it,
176	unlocks and drops the reference.
177	
178	--------------------------- address_space_operations --------------------------
179	prototypes:
180		int (*writepage)(struct page *page, struct writeback_control *wbc);
181		int (*readpage)(struct file *, struct page *);
182		int (*sync_page)(struct page *);
183		int (*writepages)(struct address_space *, struct writeback_control *);
184		int (*set_page_dirty)(struct page *page);
185		int (*readpages)(struct file *filp, struct address_space *mapping,
186				struct list_head *pages, unsigned nr_pages);
187		int (*write_begin)(struct file *, struct address_space *mapping,
188					loff_t pos, unsigned len, unsigned flags,
189					struct page **pagep, void **fsdata);
190		int (*write_end)(struct file *, struct address_space *mapping,
191					loff_t pos, unsigned len, unsigned copied,
192					struct page *page, void *fsdata);
193		sector_t (*bmap)(struct address_space *, sector_t);
194		void (*invalidatepage) (struct page *, unsigned int, unsigned int);
195		int (*releasepage) (struct page *, int);
196		void (*freepage)(struct page *);
197		int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
198		int (*migratepage)(struct address_space *, struct page *, struct page *);
199		int (*launder_page)(struct page *);
200		int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
201		int (*error_remove_page)(struct address_space *, struct page *);
202		int (*swap_activate)(struct file *);
203		int (*swap_deactivate)(struct file *);
204	
205	locking rules:
206		All except set_page_dirty and freepage may block
207	
208				PageLocked(page)	i_mutex
209	writepage:		yes, unlocks (see below)
210	readpage:		yes, unlocks
211	sync_page:		maybe
212	writepages:
213	set_page_dirty		no
214	readpages:
215	write_begin:		locks the page		yes
216	write_end:		yes, unlocks		yes
217	bmap:
218	invalidatepage:		yes
219	releasepage:		yes
220	freepage:		yes
221	direct_IO:
222	migratepage:		yes (both)
223	launder_page:		yes
224	is_partially_uptodate:	yes
225	error_remove_page:	yes
226	swap_activate:		no
227	swap_deactivate:	no
228	
229		->write_begin(), ->write_end(), ->sync_page() and ->readpage()
230	may be called from the request handler (/dev/loop).
231	
232		->readpage() unlocks the page, either synchronously or via I/O
233	completion.
234	
235		->readpages() populates the pagecache with the passed pages and starts
236	I/O against them.  They come unlocked upon I/O completion.
237	
238		->writepage() is used for two purposes: for "memory cleansing" and for
239	"sync".  These are quite different operations and the behaviour may differ
240	depending upon the mode.
241	
242	If writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then
243	it *must* start I/O against the page, even if that would involve
244	blocking on in-progress I/O.
245	
246	If writepage is called for memory cleansing (sync_mode ==
247	WBC_SYNC_NONE) then its role is to get as much writeout underway as
248	possible.  So writepage should try to avoid blocking against
249	currently-in-progress I/O.
250	
251	If the filesystem is not called for "sync" and it determines that it
252	would need to block against in-progress I/O to be able to start new I/O
253	against the page the filesystem should redirty the page with
254	redirty_page_for_writepage(), then unlock the page and return zero.
255	This may also be done to avoid internal deadlocks, but rarely.
256	
257	If the filesystem is called for sync then it must wait on any
258	in-progress I/O and then start new I/O.
259	
260	The filesystem should unlock the page synchronously, before returning to the
261	caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE
262	value. WRITEPAGE_ACTIVATE means that page cannot really be written out
263	currently, and VM should stop calling ->writepage() on this page for some
264	time. VM does this by moving page to the head of the active list, hence the
265	name.
266	
267	Unless the filesystem is going to redirty_page_for_writepage(), unlock the page
268	and return zero, writepage *must* run set_page_writeback() against the page,
269	followed by unlocking it.  Once set_page_writeback() has been run against the
270	page, write I/O can be submitted and the write I/O completion handler must run
271	end_page_writeback() once the I/O is complete.  If no I/O is submitted, the
272	filesystem must run end_page_writeback() against the page before returning from
273	writepage.
274	
275	That is: after 2.5.12, pages which are under writeout are *not* locked.  Note,
276	if the filesystem needs the page to be locked during writeout, that is ok, too,
277	the page is allowed to be unlocked at any point in time between the calls to
278	set_page_writeback() and end_page_writeback().
279	
280	Note, failure to run either redirty_page_for_writepage() or the combination of
281	set_page_writeback()/end_page_writeback() on a page submitted to writepage
282	will leave the page itself marked clean but it will be tagged as dirty in the
283	radix tree.  This incoherency can lead to all sorts of hard-to-debug problems
284	in the filesystem like having dirty inodes at umount and losing written data.
285	
286		->sync_page() locking rules are not well-defined - usually it is called
287	with lock on page, but that is not guaranteed. Considering the currently
288	existing instances of this method ->sync_page() itself doesn't look
289	well-defined...
290	
291		->writepages() is used for periodic writeback and for syscall-initiated
292	sync operations.  The address_space should start I/O against at least
293	*nr_to_write pages.  *nr_to_write must be decremented for each page which is
294	written.  The address_space implementation may write more (or less) pages
295	than *nr_to_write asks for, but it should try to be reasonably close.  If
296	nr_to_write is NULL, all dirty pages must be written.
297	
298	writepages should _only_ write pages which are present on
299	mapping->io_pages.
300	
301		->set_page_dirty() is called from various places in the kernel
302	when the target page is marked as needing writeback.  It may be called
303	under spinlock (it cannot block) and is sometimes called with the page
304	not locked.
305	
306		->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
307	filesystems and by the swapper. The latter will eventually go away.  Please,
308	keep it that way and don't breed new callers.
309	
310		->invalidatepage() is called when the filesystem must attempt to drop
311	some or all of the buffers from the page when it is being truncated. It
312	returns zero on success. If ->invalidatepage is zero, the kernel uses
313	block_invalidatepage() instead.
314	
315		->releasepage() is called when the kernel is about to try to drop the
316	buffers from the page in preparation for freeing it.  It returns zero to
317	indicate that the buffers are (or may be) freeable.  If ->releasepage is zero,
318	the kernel assumes that the fs has no private interest in the buffers.
319	
320		->freepage() is called when the kernel is done dropping the page
321	from the page cache.
322	
323		->launder_page() may be called prior to releasing a page if
324	it is still found to be dirty. It returns zero if the page was successfully
325	cleaned, or an error value if not. Note that in order to prevent the page
326	getting mapped back in and redirtied, it needs to be kept locked
327	across the entire operation.
328	
329		->swap_activate will be called with a non-zero argument on
330	files backing (non block device backed) swapfiles. A return value
331	of zero indicates success, in which case this file can be used for
332	backing swapspace. The swapspace operations will be proxied to the
333	address space operations.
334	
335		->swap_deactivate() will be called in the sys_swapoff()
336	path after ->swap_activate() returned success.
337	
338	----------------------- file_lock_operations ------------------------------
339	prototypes:
340		void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
341		void (*fl_release_private)(struct file_lock *);
342	
343	
344	locking rules:
345				inode->i_lock	may block
346	fl_copy_lock:		yes		no
347	fl_release_private:	maybe		maybe[1]
348	
349	[1]:	->fl_release_private for flock or POSIX locks is currently allowed
350	to block. Leases however can still be freed while the i_lock is held and
351	so fl_release_private called on a lease should not block.
352	
353	----------------------- lock_manager_operations ---------------------------
354	prototypes:
355		int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
356		unsigned long (*lm_owner_key)(struct file_lock *);
357		void (*lm_notify)(struct file_lock *);  /* unblock callback */
358		int (*lm_grant)(struct file_lock *, struct file_lock *, int);
359		void (*lm_break)(struct file_lock *); /* break_lease callback */
360		int (*lm_change)(struct file_lock **, int);
361	
362	locking rules:
363	
364				inode->i_lock	blocked_lock_lock	may block
365	lm_compare_owner:	yes[1]		maybe			no
366	lm_owner_key		yes[1]		yes			no
367	lm_notify:		yes		yes			no
368	lm_grant:		no		no			no
369	lm_break:		yes		no			no
370	lm_change		yes		no			no
371	
372	[1]:	->lm_compare_owner and ->lm_owner_key are generally called with
373	*an* inode->i_lock held. It may not be the i_lock of the inode
374	associated with either file_lock argument! This is the case with deadlock
375	detection, since the code has to chase down the owners of locks that may
376	be entirely unrelated to the one on which the lock is being acquired.
377	For deadlock detection however, the blocked_lock_lock is also held. The
378	fact that these locks are held ensures that the file_locks do not
379	disappear out from under you while doing the comparison or generating an
380	owner key.
381	
382	--------------------------- buffer_head -----------------------------------
383	prototypes:
384		void (*b_end_io)(struct buffer_head *bh, int uptodate);
385	
386	locking rules:
387		called from interrupts. In other words, extreme care is needed here.
388	bh is locked, but that's all warranties we have here. Currently only RAID1,
389	highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices
390	call this method upon the IO completion.
391	
392	--------------------------- block_device_operations -----------------------
393	prototypes:
394		int (*open) (struct block_device *, fmode_t);
395		int (*release) (struct gendisk *, fmode_t);
396		int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
397		int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
398		int (*direct_access) (struct block_device *, sector_t, void __pmem **,
399					unsigned long *);
400		int (*media_changed) (struct gendisk *);
401		void (*unlock_native_capacity) (struct gendisk *);
402		int (*revalidate_disk) (struct gendisk *);
403		int (*getgeo)(struct block_device *, struct hd_geometry *);
404		void (*swap_slot_free_notify) (struct block_device *, unsigned long);
405	
406	locking rules:
407				bd_mutex
408	open:			yes
409	release:		yes
410	ioctl:			no
411	compat_ioctl:		no
412	direct_access:		no
413	media_changed:		no
414	unlock_native_capacity:	no
415	revalidate_disk:	no
416	getgeo:			no
417	swap_slot_free_notify:	no	(see below)
418	
419	media_changed, unlock_native_capacity and revalidate_disk are called only from
420	check_disk_change().
421	
422	swap_slot_free_notify is called with swap_lock and sometimes the page lock
423	held.
424	
425	
426	--------------------------- file_operations -------------------------------
427	prototypes:
428		loff_t (*llseek) (struct file *, loff_t, int);
429		ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
430		ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
431		ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
432		ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
433		int (*iterate) (struct file *, struct dir_context *);
434		unsigned int (*poll) (struct file *, struct poll_table_struct *);
435		long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
436		long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
437		int (*mmap) (struct file *, struct vm_area_struct *);
438		int (*open) (struct inode *, struct file *);
439		int (*flush) (struct file *);
440		int (*release) (struct inode *, struct file *);
441		int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
442		int (*aio_fsync) (struct kiocb *, int datasync);
443		int (*fasync) (int, struct file *, int);
444		int (*lock) (struct file *, int, struct file_lock *);
445		ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
446				loff_t *);
447		ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
448				loff_t *);
449		ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t,
450				void __user *);
451		ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
452				loff_t *, int);
453		unsigned long (*get_unmapped_area)(struct file *, unsigned long,
454				unsigned long, unsigned long, unsigned long);
455		int (*check_flags)(int);
456		int (*flock) (struct file *, int, struct file_lock *);
457		ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *,
458				size_t, unsigned int);
459		ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
460				size_t, unsigned int);
461		int (*setlease)(struct file *, long, struct file_lock **, void **);
462		long (*fallocate)(struct file *, int, loff_t, loff_t);
463	};
464	
465	locking rules:
466		All may block.
467	
468	->llseek() locking has moved from llseek to the individual llseek
469	implementations.  If your fs is not using generic_file_llseek, you
470	need to acquire and release the appropriate locks in your ->llseek().
471	For many filesystems, it is probably safe to acquire the inode
472	mutex or just to use i_size_read() instead.
473	Note: this does not protect the file->f_pos against concurrent modifications
474	since this is something the userspace has to take care about.
475	
476	->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
477	Most instances call fasync_helper(), which does that maintenance, so it's
478	not normally something one needs to worry about.  Return values > 0 will be
479	mapped to zero in the VFS layer.
480	
481	->readdir() and ->ioctl() on directories must be changed. Ideally we would
482	move ->readdir() to inode_operations and use a separate method for directory
483	->ioctl() or kill the latter completely. One of the problems is that for
484	anything that resembles union-mount we won't have a struct file for all
485	components. And there are other reasons why the current interface is a mess...
486	
487	->read on directories probably must go away - we should just enforce -EISDIR
488	in sys_read() and friends.
489	
490	->setlease operations should call generic_setlease() before or after setting
491	the lease within the individual filesystem to record the result of the
492	operation
493	
494	--------------------------- dquot_operations -------------------------------
495	prototypes:
496		int (*write_dquot) (struct dquot *);
497		int (*acquire_dquot) (struct dquot *);
498		int (*release_dquot) (struct dquot *);
499		int (*mark_dirty) (struct dquot *);
500		int (*write_info) (struct super_block *, int);
501	
502	These operations are intended to be more or less wrapping functions that ensure
503	a proper locking wrt the filesystem and call the generic quota operations.
504	
505	What filesystem should expect from the generic quota functions:
506	
507			FS recursion	Held locks when called
508	write_dquot:	yes		dqonoff_sem or dqptr_sem
509	acquire_dquot:	yes		dqonoff_sem or dqptr_sem
510	release_dquot:	yes		dqonoff_sem or dqptr_sem
511	mark_dirty:	no		-
512	write_info:	yes		dqonoff_sem
513	
514	FS recursion means calling ->quota_read() and ->quota_write() from superblock
515	operations.
516	
517	More details about quota locking can be found in fs/dquot.c.
518	
519	--------------------------- vm_operations_struct -----------------------------
520	prototypes:
521		void (*open)(struct vm_area_struct*);
522		void (*close)(struct vm_area_struct*);
523		int (*fault)(struct vm_area_struct*, struct vm_fault *);
524		int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
525		int (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *);
526		int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
527	
528	locking rules:
529			mmap_sem	PageLocked(page)
530	open:		yes
531	close:		yes
532	fault:		yes		can return with page locked
533	map_pages:	yes
534	page_mkwrite:	yes		can return with page locked
535	pfn_mkwrite:	yes
536	access:		yes
537	
538		->fault() is called when a previously not present pte is about
539	to be faulted in. The filesystem must find and return the page associated
540	with the passed in "pgoff" in the vm_fault structure. If it is possible that
541	the page may be truncated and/or invalidated, then the filesystem must lock
542	the page, then ensure it is not already truncated (the page lock will block
543	subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
544	locked. The VM will unlock the page.
545	
546		->map_pages() is called when VM asks to map easy accessible pages.
547	Filesystem should find and map pages associated with offsets from "pgoff"
548	till "max_pgoff". ->map_pages() is called with page table locked and must
549	not block.  If it's not possible to reach a page without blocking,
550	filesystem should skip it. Filesystem should use do_set_pte() to setup
551	page table entry. Pointer to entry associated with offset "pgoff" is
552	passed in "pte" field in vm_fault structure. Pointers to entries for other
553	offsets should be calculated relative to "pte".
554	
555		->page_mkwrite() is called when a previously read-only pte is
556	about to become writeable. The filesystem again must ensure that there are
557	no truncate/invalidate races, and then return with the page locked. If
558	the page has been truncated, the filesystem should not look up a new page
559	like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
560	will cause the VM to retry the fault.
561	
562		->pfn_mkwrite() is the same as page_mkwrite but when the pte is
563	VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is
564	VM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior
565	after this call is to make the pte read-write, unless pfn_mkwrite returns
566	an error.
567	
568		->access() is called when get_user_pages() fails in
569	access_process_vm(), typically used to debug a process through
570	/proc/pid/mem or ptrace.  This function is needed only for
571	VM_IO | VM_PFNMAP VMAs.
572	
573	================================================================================
574				Dubious stuff
575	
576	(if you break something or notice that it is broken and do not fix it yourself
577	- at least put it here)
Hide Line Numbers
About Kernel Documentation Linux Kernel Contact Linux Resources Linux Blog

Information is copyright its respective author. All material is available from the Linux Kernel Source distributed under a GPL License. This page is provided as a free service by mjmwired.net.