About Kernel Documentation Linux Kernel Contact Linux Resources Linux Blog

Documentation / filesystems / Locking




Custom Search

Based on kernel version 3.19. Page generated on 2015-02-13 21:20 EST.

1		The text below describes the locking rules for VFS-related methods.
2	It is (believed to be) up-to-date. *Please*, if you change anything in
3	prototypes or locking protocols - update this file. And update the relevant
4	instances in the tree, don't leave that to maintainers of filesystems/devices/
5	etc. At the very least, put the list of dubious cases in the end of this file.
6	Don't turn it into log - maintainers of out-of-the-tree code are supposed to
7	be able to use diff(1).
8		Thing currently missing here: socket operations. Alexey?
9	
10	--------------------------- dentry_operations --------------------------
11	prototypes:
12		int (*d_revalidate)(struct dentry *, unsigned int);
13		int (*d_weak_revalidate)(struct dentry *, unsigned int);
14		int (*d_hash)(const struct dentry *, struct qstr *);
15		int (*d_compare)(const struct dentry *, const struct dentry *,
16				unsigned int, const char *, const struct qstr *);
17		int (*d_delete)(struct dentry *);
18		void (*d_release)(struct dentry *);
19		void (*d_iput)(struct dentry *, struct inode *);
20		char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
21		struct vfsmount *(*d_automount)(struct path *path);
22		int (*d_manage)(struct dentry *, bool);
23	
24	locking rules:
25			rename_lock	->d_lock	may block	rcu-walk
26	d_revalidate:	no		no		yes (ref-walk)	maybe
27	d_weak_revalidate:no		no		yes	 	no
28	d_hash		no		no		no		maybe
29	d_compare:	yes		no		no		maybe
30	d_delete:	no		yes		no		no
31	d_release:	no		no		yes		no
32	d_prune:        no              yes             no              no
33	d_iput:		no		no		yes		no
34	d_dname:	no		no		no		no
35	d_automount:	no		no		yes		no
36	d_manage:	no		no		yes (ref-walk)	maybe
37	
38	--------------------------- inode_operations --------------------------- 
39	prototypes:
40		int (*create) (struct inode *,struct dentry *,umode_t, bool);
41		struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
42		int (*link) (struct dentry *,struct inode *,struct dentry *);
43		int (*unlink) (struct inode *,struct dentry *);
44		int (*symlink) (struct inode *,struct dentry *,const char *);
45		int (*mkdir) (struct inode *,struct dentry *,umode_t);
46		int (*rmdir) (struct inode *,struct dentry *);
47		int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
48		int (*rename) (struct inode *, struct dentry *,
49				struct inode *, struct dentry *);
50		int (*rename2) (struct inode *, struct dentry *,
51				struct inode *, struct dentry *, unsigned int);
52		int (*readlink) (struct dentry *, char __user *,int);
53		void * (*follow_link) (struct dentry *, struct nameidata *);
54		void (*put_link) (struct dentry *, struct nameidata *, void *);
55		void (*truncate) (struct inode *);
56		int (*permission) (struct inode *, int, unsigned int);
57		int (*get_acl)(struct inode *, int);
58		int (*setattr) (struct dentry *, struct iattr *);
59		int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
60		int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
61		ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
62		ssize_t (*listxattr) (struct dentry *, char *, size_t);
63		int (*removexattr) (struct dentry *, const char *);
64		int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
65		void (*update_time)(struct inode *, struct timespec *, int);
66		int (*atomic_open)(struct inode *, struct dentry *,
67					struct file *, unsigned open_flag,
68					umode_t create_mode, int *opened);
69		int (*tmpfile) (struct inode *, struct dentry *, umode_t);
70		int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
71	
72	locking rules:
73		all may block
74			i_mutex(inode)
75	lookup:		yes
76	create:		yes
77	link:		yes (both)
78	mknod:		yes
79	symlink:	yes
80	mkdir:		yes
81	unlink:		yes (both)
82	rmdir:		yes (both)	(see below)
83	rename:		yes (all)	(see below)
84	rename2:	yes (all)	(see below)
85	readlink:	no
86	follow_link:	no
87	put_link:	no
88	setattr:	yes
89	permission:	no (may not block if called in rcu-walk mode)
90	get_acl:	no
91	getattr:	no
92	setxattr:	yes
93	getxattr:	no
94	listxattr:	no
95	removexattr:	yes
96	fiemap:		no
97	update_time:	no
98	atomic_open:	yes
99	tmpfile:	no
100	dentry_open:	no
101	
102		Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
103	victim.
104		cross-directory ->rename() and rename2() has (per-superblock)
105	->s_vfs_rename_sem.
106	
107	See Documentation/filesystems/directory-locking for more detailed discussion
108	of the locking scheme for directory operations.
109	
110	--------------------------- super_operations ---------------------------
111	prototypes:
112		struct inode *(*alloc_inode)(struct super_block *sb);
113		void (*destroy_inode)(struct inode *);
114		void (*dirty_inode) (struct inode *, int flags);
115		int (*write_inode) (struct inode *, struct writeback_control *wbc);
116		int (*drop_inode) (struct inode *);
117		void (*evict_inode) (struct inode *);
118		void (*put_super) (struct super_block *);
119		int (*sync_fs)(struct super_block *sb, int wait);
120		int (*freeze_fs) (struct super_block *);
121		int (*unfreeze_fs) (struct super_block *);
122		int (*statfs) (struct dentry *, struct kstatfs *);
123		int (*remount_fs) (struct super_block *, int *, char *);
124		void (*umount_begin) (struct super_block *);
125		int (*show_options)(struct seq_file *, struct dentry *);
126		ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
127		ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
128		int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
129	
130	locking rules:
131		All may block [not true, see below]
132				s_umount
133	alloc_inode:
134	destroy_inode:
135	dirty_inode:
136	write_inode:
137	drop_inode:				!!!inode->i_lock!!!
138	evict_inode:
139	put_super:		write
140	sync_fs:		read
141	freeze_fs:		write
142	unfreeze_fs:		write
143	statfs:			maybe(read)	(see below)
144	remount_fs:		write
145	umount_begin:		no
146	show_options:		no		(namespace_sem)
147	quota_read:		no		(see below)
148	quota_write:		no		(see below)
149	bdev_try_to_free_page:	no		(see below)
150	
151	->statfs() has s_umount (shared) when called by ustat(2) (native or
152	compat), but that's an accident of bad API; s_umount is used to pin
153	the superblock down when we only have dev_t given us by userland to
154	identify the superblock.  Everything else (statfs(), fstatfs(), etc.)
155	doesn't hold it when calling ->statfs() - superblock is pinned down
156	by resolving the pathname passed to syscall.
157	->quota_read() and ->quota_write() functions are both guaranteed to
158	be the only ones operating on the quota file by the quota code (via
159	dqio_sem) (unless an admin really wants to screw up something and
160	writes to quota files with quotas on). For other details about locking
161	see also dquot_operations section.
162	->bdev_try_to_free_page is called from the ->releasepage handler of
163	the block device inode.  See there for more details.
164	
165	--------------------------- file_system_type ---------------------------
166	prototypes:
167		int (*get_sb) (struct file_system_type *, int,
168			       const char *, void *, struct vfsmount *);
169		struct dentry *(*mount) (struct file_system_type *, int,
170			       const char *, void *);
171		void (*kill_sb) (struct super_block *);
172	locking rules:
173			may block
174	mount		yes
175	kill_sb		yes
176	
177	->mount() returns ERR_PTR or the root dentry; its superblock should be locked
178	on return.
179	->kill_sb() takes a write-locked superblock, does all shutdown work on it,
180	unlocks and drops the reference.
181	
182	--------------------------- address_space_operations --------------------------
183	prototypes:
184		int (*writepage)(struct page *page, struct writeback_control *wbc);
185		int (*readpage)(struct file *, struct page *);
186		int (*sync_page)(struct page *);
187		int (*writepages)(struct address_space *, struct writeback_control *);
188		int (*set_page_dirty)(struct page *page);
189		int (*readpages)(struct file *filp, struct address_space *mapping,
190				struct list_head *pages, unsigned nr_pages);
191		int (*write_begin)(struct file *, struct address_space *mapping,
192					loff_t pos, unsigned len, unsigned flags,
193					struct page **pagep, void **fsdata);
194		int (*write_end)(struct file *, struct address_space *mapping,
195					loff_t pos, unsigned len, unsigned copied,
196					struct page *page, void *fsdata);
197		sector_t (*bmap)(struct address_space *, sector_t);
198		void (*invalidatepage) (struct page *, unsigned int, unsigned int);
199		int (*releasepage) (struct page *, int);
200		void (*freepage)(struct page *);
201		int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
202		int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
203					unsigned long *);
204		int (*migratepage)(struct address_space *, struct page *, struct page *);
205		int (*launder_page)(struct page *);
206		int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
207		int (*error_remove_page)(struct address_space *, struct page *);
208		int (*swap_activate)(struct file *);
209		int (*swap_deactivate)(struct file *);
210	
211	locking rules:
212		All except set_page_dirty and freepage may block
213	
214				PageLocked(page)	i_mutex
215	writepage:		yes, unlocks (see below)
216	readpage:		yes, unlocks
217	sync_page:		maybe
218	writepages:
219	set_page_dirty		no
220	readpages:
221	write_begin:		locks the page		yes
222	write_end:		yes, unlocks		yes
223	bmap:
224	invalidatepage:		yes
225	releasepage:		yes
226	freepage:		yes
227	direct_IO:
228	get_xip_mem:					maybe
229	migratepage:		yes (both)
230	launder_page:		yes
231	is_partially_uptodate:	yes
232	error_remove_page:	yes
233	swap_activate:		no
234	swap_deactivate:	no
235	
236		->write_begin(), ->write_end(), ->sync_page() and ->readpage()
237	may be called from the request handler (/dev/loop).
238	
239		->readpage() unlocks the page, either synchronously or via I/O
240	completion.
241	
242		->readpages() populates the pagecache with the passed pages and starts
243	I/O against them.  They come unlocked upon I/O completion.
244	
245		->writepage() is used for two purposes: for "memory cleansing" and for
246	"sync".  These are quite different operations and the behaviour may differ
247	depending upon the mode.
248	
249	If writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then
250	it *must* start I/O against the page, even if that would involve
251	blocking on in-progress I/O.
252	
253	If writepage is called for memory cleansing (sync_mode ==
254	WBC_SYNC_NONE) then its role is to get as much writeout underway as
255	possible.  So writepage should try to avoid blocking against
256	currently-in-progress I/O.
257	
258	If the filesystem is not called for "sync" and it determines that it
259	would need to block against in-progress I/O to be able to start new I/O
260	against the page the filesystem should redirty the page with
261	redirty_page_for_writepage(), then unlock the page and return zero.
262	This may also be done to avoid internal deadlocks, but rarely.
263	
264	If the filesystem is called for sync then it must wait on any
265	in-progress I/O and then start new I/O.
266	
267	The filesystem should unlock the page synchronously, before returning to the
268	caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE
269	value. WRITEPAGE_ACTIVATE means that page cannot really be written out
270	currently, and VM should stop calling ->writepage() on this page for some
271	time. VM does this by moving page to the head of the active list, hence the
272	name.
273	
274	Unless the filesystem is going to redirty_page_for_writepage(), unlock the page
275	and return zero, writepage *must* run set_page_writeback() against the page,
276	followed by unlocking it.  Once set_page_writeback() has been run against the
277	page, write I/O can be submitted and the write I/O completion handler must run
278	end_page_writeback() once the I/O is complete.  If no I/O is submitted, the
279	filesystem must run end_page_writeback() against the page before returning from
280	writepage.
281	
282	That is: after 2.5.12, pages which are under writeout are *not* locked.  Note,
283	if the filesystem needs the page to be locked during writeout, that is ok, too,
284	the page is allowed to be unlocked at any point in time between the calls to
285	set_page_writeback() and end_page_writeback().
286	
287	Note, failure to run either redirty_page_for_writepage() or the combination of
288	set_page_writeback()/end_page_writeback() on a page submitted to writepage
289	will leave the page itself marked clean but it will be tagged as dirty in the
290	radix tree.  This incoherency can lead to all sorts of hard-to-debug problems
291	in the filesystem like having dirty inodes at umount and losing written data.
292	
293		->sync_page() locking rules are not well-defined - usually it is called
294	with lock on page, but that is not guaranteed. Considering the currently
295	existing instances of this method ->sync_page() itself doesn't look
296	well-defined...
297	
298		->writepages() is used for periodic writeback and for syscall-initiated
299	sync operations.  The address_space should start I/O against at least
300	*nr_to_write pages.  *nr_to_write must be decremented for each page which is
301	written.  The address_space implementation may write more (or less) pages
302	than *nr_to_write asks for, but it should try to be reasonably close.  If
303	nr_to_write is NULL, all dirty pages must be written.
304	
305	writepages should _only_ write pages which are present on
306	mapping->io_pages.
307	
308		->set_page_dirty() is called from various places in the kernel
309	when the target page is marked as needing writeback.  It may be called
310	under spinlock (it cannot block) and is sometimes called with the page
311	not locked.
312	
313		->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
314	filesystems and by the swapper. The latter will eventually go away.  Please,
315	keep it that way and don't breed new callers.
316	
317		->invalidatepage() is called when the filesystem must attempt to drop
318	some or all of the buffers from the page when it is being truncated. It
319	returns zero on success. If ->invalidatepage is zero, the kernel uses
320	block_invalidatepage() instead.
321	
322		->releasepage() is called when the kernel is about to try to drop the
323	buffers from the page in preparation for freeing it.  It returns zero to
324	indicate that the buffers are (or may be) freeable.  If ->releasepage is zero,
325	the kernel assumes that the fs has no private interest in the buffers.
326	
327		->freepage() is called when the kernel is done dropping the page
328	from the page cache.
329	
330		->launder_page() may be called prior to releasing a page if
331	it is still found to be dirty. It returns zero if the page was successfully
332	cleaned, or an error value if not. Note that in order to prevent the page
333	getting mapped back in and redirtied, it needs to be kept locked
334	across the entire operation.
335	
336		->swap_activate will be called with a non-zero argument on
337	files backing (non block device backed) swapfiles. A return value
338	of zero indicates success, in which case this file can be used for
339	backing swapspace. The swapspace operations will be proxied to the
340	address space operations.
341	
342		->swap_deactivate() will be called in the sys_swapoff()
343	path after ->swap_activate() returned success.
344	
345	----------------------- file_lock_operations ------------------------------
346	prototypes:
347		void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
348		void (*fl_release_private)(struct file_lock *);
349	
350	
351	locking rules:
352				inode->i_lock	may block
353	fl_copy_lock:		yes		no
354	fl_release_private:	maybe		maybe[1]
355	
356	[1]:	->fl_release_private for flock or POSIX locks is currently allowed
357	to block. Leases however can still be freed while the i_lock is held and
358	so fl_release_private called on a lease should not block.
359	
360	----------------------- lock_manager_operations ---------------------------
361	prototypes:
362		int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
363		unsigned long (*lm_owner_key)(struct file_lock *);
364		void (*lm_notify)(struct file_lock *);  /* unblock callback */
365		int (*lm_grant)(struct file_lock *, struct file_lock *, int);
366		void (*lm_break)(struct file_lock *); /* break_lease callback */
367		int (*lm_change)(struct file_lock **, int);
368	
369	locking rules:
370	
371				inode->i_lock	blocked_lock_lock	may block
372	lm_compare_owner:	yes[1]		maybe			no
373	lm_owner_key		yes[1]		yes			no
374	lm_notify:		yes		yes			no
375	lm_grant:		no		no			no
376	lm_break:		yes		no			no
377	lm_change		yes		no			no
378	
379	[1]:	->lm_compare_owner and ->lm_owner_key are generally called with
380	*an* inode->i_lock held. It may not be the i_lock of the inode
381	associated with either file_lock argument! This is the case with deadlock
382	detection, since the code has to chase down the owners of locks that may
383	be entirely unrelated to the one on which the lock is being acquired.
384	For deadlock detection however, the blocked_lock_lock is also held. The
385	fact that these locks are held ensures that the file_locks do not
386	disappear out from under you while doing the comparison or generating an
387	owner key.
388	
389	--------------------------- buffer_head -----------------------------------
390	prototypes:
391		void (*b_end_io)(struct buffer_head *bh, int uptodate);
392	
393	locking rules:
394		called from interrupts. In other words, extreme care is needed here.
395	bh is locked, but that's all warranties we have here. Currently only RAID1,
396	highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices
397	call this method upon the IO completion.
398	
399	--------------------------- block_device_operations -----------------------
400	prototypes:
401		int (*open) (struct block_device *, fmode_t);
402		int (*release) (struct gendisk *, fmode_t);
403		int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
404		int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
405		int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *);
406		int (*media_changed) (struct gendisk *);
407		void (*unlock_native_capacity) (struct gendisk *);
408		int (*revalidate_disk) (struct gendisk *);
409		int (*getgeo)(struct block_device *, struct hd_geometry *);
410		void (*swap_slot_free_notify) (struct block_device *, unsigned long);
411	
412	locking rules:
413				bd_mutex
414	open:			yes
415	release:		yes
416	ioctl:			no
417	compat_ioctl:		no
418	direct_access:		no
419	media_changed:		no
420	unlock_native_capacity:	no
421	revalidate_disk:	no
422	getgeo:			no
423	swap_slot_free_notify:	no	(see below)
424	
425	media_changed, unlock_native_capacity and revalidate_disk are called only from
426	check_disk_change().
427	
428	swap_slot_free_notify is called with swap_lock and sometimes the page lock
429	held.
430	
431	
432	--------------------------- file_operations -------------------------------
433	prototypes:
434		loff_t (*llseek) (struct file *, loff_t, int);
435		ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
436		ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
437		ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
438		ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
439		ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
440		ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
441		int (*iterate) (struct file *, struct dir_context *);
442		unsigned int (*poll) (struct file *, struct poll_table_struct *);
443		long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
444		long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
445		int (*mmap) (struct file *, struct vm_area_struct *);
446		int (*open) (struct inode *, struct file *);
447		int (*flush) (struct file *);
448		int (*release) (struct inode *, struct file *);
449		int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
450		int (*aio_fsync) (struct kiocb *, int datasync);
451		int (*fasync) (int, struct file *, int);
452		int (*lock) (struct file *, int, struct file_lock *);
453		ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
454				loff_t *);
455		ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
456				loff_t *);
457		ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t,
458				void __user *);
459		ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
460				loff_t *, int);
461		unsigned long (*get_unmapped_area)(struct file *, unsigned long,
462				unsigned long, unsigned long, unsigned long);
463		int (*check_flags)(int);
464		int (*flock) (struct file *, int, struct file_lock *);
465		ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *,
466				size_t, unsigned int);
467		ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
468				size_t, unsigned int);
469		int (*setlease)(struct file *, long, struct file_lock **, void **);
470		long (*fallocate)(struct file *, int, loff_t, loff_t);
471	};
472	
473	locking rules:
474		All may block.
475	
476	->llseek() locking has moved from llseek to the individual llseek
477	implementations.  If your fs is not using generic_file_llseek, you
478	need to acquire and release the appropriate locks in your ->llseek().
479	For many filesystems, it is probably safe to acquire the inode
480	mutex or just to use i_size_read() instead.
481	Note: this does not protect the file->f_pos against concurrent modifications
482	since this is something the userspace has to take care about.
483	
484	->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
485	Most instances call fasync_helper(), which does that maintenance, so it's
486	not normally something one needs to worry about.  Return values > 0 will be
487	mapped to zero in the VFS layer.
488	
489	->readdir() and ->ioctl() on directories must be changed. Ideally we would
490	move ->readdir() to inode_operations and use a separate method for directory
491	->ioctl() or kill the latter completely. One of the problems is that for
492	anything that resembles union-mount we won't have a struct file for all
493	components. And there are other reasons why the current interface is a mess...
494	
495	->read on directories probably must go away - we should just enforce -EISDIR
496	in sys_read() and friends.
497	
498	->setlease operations should call generic_setlease() before or after setting
499	the lease within the individual filesystem to record the result of the
500	operation
501	
502	--------------------------- dquot_operations -------------------------------
503	prototypes:
504		int (*write_dquot) (struct dquot *);
505		int (*acquire_dquot) (struct dquot *);
506		int (*release_dquot) (struct dquot *);
507		int (*mark_dirty) (struct dquot *);
508		int (*write_info) (struct super_block *, int);
509	
510	These operations are intended to be more or less wrapping functions that ensure
511	a proper locking wrt the filesystem and call the generic quota operations.
512	
513	What filesystem should expect from the generic quota functions:
514	
515			FS recursion	Held locks when called
516	write_dquot:	yes		dqonoff_sem or dqptr_sem
517	acquire_dquot:	yes		dqonoff_sem or dqptr_sem
518	release_dquot:	yes		dqonoff_sem or dqptr_sem
519	mark_dirty:	no		-
520	write_info:	yes		dqonoff_sem
521	
522	FS recursion means calling ->quota_read() and ->quota_write() from superblock
523	operations.
524	
525	More details about quota locking can be found in fs/dquot.c.
526	
527	--------------------------- vm_operations_struct -----------------------------
528	prototypes:
529		void (*open)(struct vm_area_struct*);
530		void (*close)(struct vm_area_struct*);
531		int (*fault)(struct vm_area_struct*, struct vm_fault *);
532		int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
533		int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
534	
535	locking rules:
536			mmap_sem	PageLocked(page)
537	open:		yes
538	close:		yes
539	fault:		yes		can return with page locked
540	map_pages:	yes
541	page_mkwrite:	yes		can return with page locked
542	access:		yes
543	
544		->fault() is called when a previously not present pte is about
545	to be faulted in. The filesystem must find and return the page associated
546	with the passed in "pgoff" in the vm_fault structure. If it is possible that
547	the page may be truncated and/or invalidated, then the filesystem must lock
548	the page, then ensure it is not already truncated (the page lock will block
549	subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
550	locked. The VM will unlock the page.
551	
552		->map_pages() is called when VM asks to map easy accessible pages.
553	Filesystem should find and map pages associated with offsets from "pgoff"
554	till "max_pgoff". ->map_pages() is called with page table locked and must
555	not block.  If it's not possible to reach a page without blocking,
556	filesystem should skip it. Filesystem should use do_set_pte() to setup
557	page table entry. Pointer to entry associated with offset "pgoff" is
558	passed in "pte" field in vm_fault structure. Pointers to entries for other
559	offsets should be calculated relative to "pte".
560	
561		->page_mkwrite() is called when a previously read-only pte is
562	about to become writeable. The filesystem again must ensure that there are
563	no truncate/invalidate races, and then return with the page locked. If
564	the page has been truncated, the filesystem should not look up a new page
565	like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
566	will cause the VM to retry the fault.
567	
568		->access() is called when get_user_pages() fails in
569	access_process_vm(), typically used to debug a process through
570	/proc/pid/mem or ptrace.  This function is needed only for
571	VM_IO | VM_PFNMAP VMAs.
572	
573	================================================================================
574				Dubious stuff
575	
576	(if you break something or notice that it is broken and do not fix it yourself
577	- at least put it here)
Hide Line Numbers
About Kernel Documentation Linux Kernel Contact Linux Resources Linux Blog

Information is copyright its respective author. All material is available from the Linux Kernel Source distributed under a GPL License. This page is provided as a free service by mjmwired.net.