Linux文件系统学习:io的提交过程(7)

  • A+
所属分类:Linux系统

从pagecache到bio到requst

我们从read流程和write流程中都知道,最后都会到readpages或者writepages中来

我们就从readpage开始分析,write的基本上和《Linux-块设备驱动之框架详细分析(详解)》讲的差不多,

也就是会到ll_rw_block函数中,至于怎么到这个函数中,就不求甚解了。

我们来看readpage流程。

 

static int read_pages(struct address_space *mapping, struct file *filp,
		struct list_head *pages, unsigned nr_pages)
{
	struct blk_plug plug;
	unsigned page_idx;
	int ret;

	blk_start_plug(&plug);

	if (mapping->a_ops->readpages) {
		put_pages_list(pages);
		goto out;
	}

	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
		struct page *page = list_to_page(pages);
		list_del(&page->lru);
		if (!add_to_page_cache_lru(page, mapping, page->index,
				mapping_gfp_constraint(mapping, GFP_KERNEL))) {
			mapping->a_ops->readpage(filp, page);
		}
		page_cache_release(page);
	}
	ret = 0;

out:
	blk_finish_plug(&plug);

	return ret;
}

 

假设我们读取的文件还没有在lru中。就一定会走mapping->a_ops->readpage

我们假设是ext4文件系统。

static const struct address_space_operations ext4_aops = {
	.readpage		= ext4_readpage,
	.readpages		= ext4_readpages,
	.writepage		= ext4_writepage,
	.writepages		= ext4_writepages,
	.write_begin		= ext4_write_begin,
	.write_end		= ext4_write_end,
	.bmap			= ext4_bmap,
	.invalidatepage		= ext4_invalidatepage,
	.releasepage		= ext4_releasepage,
	.direct_IO		= ext4_direct_IO,
	.migratepage		= buffer_migrate_page,
	.is_partially_uptodate  = block_is_partially_uptodate,
	.error_remove_page	= generic_error_remove_page,
};

static int
ext4_readpages(struct file *file, struct address_space *mapping,
		struct list_head *pages, unsigned nr_pages)
{
	struct inode *inode = mapping->host;

	if (ext4_has_inline_data(inode))
		return 0;

	return ext4_mpage_readpages(mapping, pages, NULL, nr_pages);
}

 

没有做什么,就是你要多少pages,就转到ext4_mpage_readpages中来。

int ext4_mpage_readpages(struct address_space *mapping,
			 struct list_head *pages, struct page *page,
			 unsigned nr_pages)
{
	struct bio *bio = NULL;
	unsigned page_idx;
	sector_t last_block_in_bio = 0;

	struct inode *inode = mapping->host;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
	const unsigned blocksize = 1 << blkbits;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
	sector_t blocks[MAX_BUF_PER_PAGE];
	unsigned page_block;
	struct block_device *bdev = inode->i_sb->s_bdev;
	int length;
	unsigned relative_block = 0;
	struct ext4_map_blocks map;

	map.m_pblk = 0;
	map.m_lblk = 0;
	map.m_len = 0;
	map.m_flags = 0;

	for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
		int fully_mapped = 1;
		unsigned first_hole = blocks_per_page;

		prefetchw(&page->flags);
		if (pages) {
			page = list_entry(pages->prev, struct page, lru);
			list_del(&page->lru);
			if (add_to_page_cache_lru(page, mapping, page->index,
				  mapping_gfp_constraint(mapping, GFP_KERNEL)))
				goto next_page;
		}

		if (page_has_buffers(page))
			goto confused;

		block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
		last_block = block_in_file + nr_pages * blocks_per_page;
		last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
		if (last_block > last_block_in_file)
			last_block = last_block_in_file;
		page_block = 0;

		/*
		 * Map blocks using the previous result first.
		 */
		if ((map.m_flags & EXT4_MAP_MAPPED) &&
		    block_in_file > map.m_lblk &&
		    block_in_file < (map.m_lblk + map.m_len)) {
			unsigned map_offset = block_in_file - map.m_lblk;
			unsigned last = map.m_len - map_offset;

			for (relative_block = 0; ; relative_block++) {
				if (relative_block == last) {
					/* needed? */
					map.m_flags &= ~EXT4_MAP_MAPPED;
					break;
				}
				if (page_block == blocks_per_page)
					break;
				blocks[page_block] = map.m_pblk + map_offset +
					relative_block;
				page_block++;
				block_in_file++;
			}
		}

		/*
		 * Then do more ext4_map_blocks() calls until we are
		 * done with this page.
		 */
		while (page_block < blocks_per_page) {
			if (block_in_file < last_block) {
				map.m_lblk = block_in_file;
				map.m_len = last_block - block_in_file;

				if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
				set_error_page:
					SetPageError(page);
					zero_user_segment(page, 0,
							  PAGE_CACHE_SIZE);
					unlock_page(page);
					goto next_page;
				}
			}
			if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
				fully_mapped = 0;
				if (first_hole == blocks_per_page)
					first_hole = page_block;
				page_block++;
				block_in_file++;
				continue;
			}
			if (first_hole != blocks_per_page)
				goto confused;		/* hole -> non-hole */

			/* Contiguous blocks? */
			if (page_block && blocks[page_block-1] != map.m_pblk-1)
				goto confused;
			for (relative_block = 0; ; relative_block++) {
				if (relative_block == map.m_len) {
					/* needed? */
					map.m_flags &= ~EXT4_MAP_MAPPED;
					break;
				} else if (page_block == blocks_per_page)
					break;
				blocks[page_block] = map.m_pblk+relative_block;
				page_block++;
				block_in_file++;
			}
		}
		if (first_hole != blocks_per_page) {
			zero_user_segment(page, first_hole << blkbits,
					  PAGE_CACHE_SIZE);
			if (first_hole == 0) {
				SetPageUptodate(page);
				unlock_page(page);
				goto next_page;
			}
		} else if (fully_mapped) {
			SetPageMappedToDisk(page);
		}
		if (fully_mapped && blocks_per_page == 1 &&
		    !PageUptodate(page) && cleancache_get_page(page) == 0) {
			SetPageUptodate(page);
			goto confused;
		}

		/*
		 * This page will go to BIO.  Do we need to send this
		 * BIO off first?
		 */
		if (bio && (last_block_in_bio != blocks[0] - 1)) {
		submit_and_realloc:
			ext4_submit_bio_read(bio);
			bio = NULL;
		}
		if (bio == NULL) {
			struct ext4_crypto_ctx *ctx = NULL;

			if (ext4_encrypted_inode(inode) &&
			    S_ISREG(inode->i_mode)) {
				ctx = ext4_get_crypto_ctx(inode);
				if (IS_ERR(ctx))
					goto set_error_page;
			}
			bio = bio_alloc(GFP_KERNEL,
				min_t(int, nr_pages, BIO_MAX_PAGES));
			if (!bio) {
				if (ctx)
					ext4_release_crypto_ctx(ctx);
				goto set_error_page;
			}
			bio->bi_bdev = bdev;
			bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
			bio->bi_end_io = mpage_end_io;
			bio->bi_private = ctx;
		}

		length = first_hole << blkbits;
		if (bio_add_page(bio, page, length, 0) < length)
			goto submit_and_realloc;

		if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
		     (relative_block == map.m_len)) ||
		    (first_hole != blocks_per_page)) {
			ext4_submit_bio_read(bio);
			bio = NULL;
		} else
			last_block_in_bio = blocks[blocks_per_page - 1];
		goto next_page;
	confused:
		if (bio) {
			ext4_submit_bio_read(bio);
			bio = NULL;
		}
		if (!PageUptodate(page))
			block_read_full_page(page, ext4_get_block);
		else
			unlock_page(page);
	next_page:
		if (pages)
			page_cache_release(page);
	}
	BUG_ON(pages && !list_empty(pages));
	if (bio)
		ext4_submit_bio_read(bio);
	return 0;
}

 

1、前面是做一些结构体的初始化

2、后面就是for循环遍历每个page

3、然后就是说对建立一个bio结构体

4、然后对bio结构体的数据进行初始化

5、建立page和bio的关联。bio_add_page

6、然后是提交这个bio去申请数据ext4_submit_bio_read

这里我们要注意关联的部分

int bio_add_page(struct bio *bio, struct page *page,
		 unsigned int len, unsigned int offset)
{
	struct bio_vec *bv;

	/*
	 * cloned bio must not modify vec list
	 */
	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
		return 0;
	if (bio->bi_vcnt > 0) {
		bv = &bio->bi_io_vec[bio->bi_vcnt - 1];

		if (page == bv->bv_page &&
		    offset == bv->bv_offset + bv->bv_len) {
			bv->bv_len += len;
			goto done;
		}
	}

	if (bio->bi_vcnt >= bio->bi_max_vecs)
		return 0;

	bv		= &bio->bi_io_vec[bio->bi_vcnt];
	bv->bv_page	= page;
	bv->bv_len	= len;
	bv->bv_offset	= offset;

	bio->bi_vcnt++;
done:
	bio->bi_iter.bi_size += len;
	return len;
}

 

关联也很简单就是数据指针。我们通过之前的《2.Linux 块设备驱动代码编写---简单驱动》 这个程序里,

pRHdata = pdev->data + (bio->bi_sector * RAMHD_SECTOR_SIZE); 
  bio_for_each_segment(bvec, bio, i){ 
    pBuffer = kmap(bvec->bv_page) + bvec->bv_offset; 
    switch(bio_data_dir(bio)){ 
      case READ: 
        memcpy(pBuffer, pRHdata, bvec->bv_len); 
        flush_dcache_page(bvec->bv_page); 
        break;

 

我们知道,到了驱动,它会把磁盘或者块设备中数据拷贝到bv_page,而这个指针就是我们前面关联的。

另外一种方式的《4.Linux-块设备驱动(详解)》是只有requst的,其实也是一样的,只是封装格式不同而已。

至于最后的

然后是提交这个bio去申请数据ext4_submit_bio_read

最后会到submit_bio这个函数,后面我们会分析。

但是这里是有疑问的,这里每个page进行一次submit_bio,这个就不对了把,每个bio是有链表,很明显这里没有连接在一块。

其实应该猜到了,会在submit_bio 的过程中连接在一块的。

submit_bio

static void
ext4_submit_bio_read(struct bio *bio)
{
	if (trace_android_fs_dataread_start_enabled()) {
		struct page *first_page = bio->bi_io_vec[0].bv_page;

		if (first_page != NULL) {
			trace_android_fs_dataread_start(
				first_page->mapping->host,
				page_offset(first_page),
				bio->bi_iter.bi_size,
				current->pid,
				current->comm);
		}
	}
	submit_bio(READ, bio);
}

 

上次我们分析到这里ext4_submit_bio_read

后面就到了submit_bio.

block/blk-core.c
blk_qc_t submit_bio(int rw, struct bio *bio)
{
	bio->bi_rw |= rw;
	if (bio_has_data(bio)) {
		unsigned int count;

		if (unlikely(rw & REQ_WRITE_SAME))
			count = bdev_logical_block_size(bio->bi_bdev) >> 9;
		else
			count = bio_sectors(bio);

		if (rw & WRITE) {
			count_vm_events(PGPGOUT, count);
		} else {
			task_io_account_read(bio->bi_iter.bi_size);
			count_vm_events(PGPGIN, count);
		}

		mtk_btag_pidlog_submit_bio(bio);

		if (unlikely(block_dump)) {
			char b[BDEVNAME_SIZE];
			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
			current->comm, task_pid_nr(current),
				(rw & WRITE) ? "WRITE" : "READ",
				(unsigned long long)bio->bi_iter.bi_sector,
				bdevname(bio->bi_bdev, b),
				count);
		}
	}

	return generic_make_request(bio);
}

 

这里就直接到generic_make_requst,

blk_qc_t generic_make_request(struct bio *bio)
{
	struct bio_list bio_list_on_stack;
	blk_qc_t ret = BLK_QC_T_NONE;

	if (!generic_make_request_checks(bio))
		goto out;

	if (current->bio_list) {
		bio_list_add(current->bio_list, bio);
		goto out;
	}

	BUG_ON(bio->bi_next);
	bio_list_init(&bio_list_on_stack);
	current->bio_list = &bio_list_on_stack;
	do {
		struct request_queue *q = bdev_get_queue(bio->bi_bdev);

		if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {

			ret = q->make_request_fn(q, bio);

			blk_queue_exit(q);

			bio = bio_list_pop(current->bio_list);
		} else {
			struct bio *bio_next = bio_list_pop(current->bio_list);

			bio_io_error(bio);
			bio = bio_next;
		}
	} while (bio);
	current->bio_list = NULL; /* deactivate */

out:
	return ret;
}

 

这里面很有意思的就是有个current->bio_list ,类似上篇疑问的解答。

 

# Linux必备书籍推荐

 

《LINUX内核源代码情景分析(上册) 》毛德操,胡希明      >>>京东购买     >>>淘宝购买 

《LINUX内核源代码情景分析(下册) 》毛德操,胡希明      >>>京东购买     >>>淘宝购买

《嵌入式Linux应用开发完全手册 》韦东山 著     >>>京东购买     >>>淘宝购买 领券

《深入理解Linux内核第3版》(美)博韦      >>>京东购买     >>>淘宝购买    

《鸟哥的Linux私房菜:基础学习篇(第四版)》鸟哥      >>>京东购买     >>>淘宝购买

 

#免费电子书领取

神农笔记微信公众号

扫一扫关注微信公众号,上述5本电子书免费领取。

https://pan.baidu.com/s/1q5IjXAmybs8NBseR4R8Ksg

扫码关注微信公众号,回复“Linux” ,即可获取提取码

 

发表评论

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen: