Linux文件系统学习:文件read和BIO调度分析(4)

  • A+
所属分类:Linux系统

 

int ext4_mpage_readpages(struct address_space *mapping,
			 struct list_head *pages, struct page *page,
			 unsigned nr_pages)
{
	struct bio *bio = NULL;
	unsigned page_idx;
	sector_t last_block_in_bio = 0;

	struct inode *inode = mapping->host;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
	const unsigned blocksize = 1 << blkbits;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
	sector_t blocks[MAX_BUF_PER_PAGE];
	unsigned page_block;
	struct block_device *bdev = inode->i_sb->s_bdev;
	int length;
	unsigned relative_block = 0;
	struct ext4_map_blocks map;

	map.m_pblk = 0;
	map.m_lblk = 0;
	map.m_len = 0;
	map.m_flags = 0;

	for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
		int fully_mapped = 1;
		unsigned first_hole = blocks_per_page;

		prefetchw(&page->flags);
		if (pages) {
			page = list_entry(pages->prev, struct page, lru);
			list_del(&page->lru);
			if (add_to_page_cache_lru(page, mapping, page->index,
				  mapping_gfp_constraint(mapping, GFP_KERNEL)))
				goto next_page;
		}

		if (page_has_buffers(page))
			goto confused;

		block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
		last_block = block_in_file + nr_pages * blocks_per_page;
		last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
		if (last_block > last_block_in_file)
			last_block = last_block_in_file;
		page_block = 0;

		/*
		 * Map blocks using the previous result first.
		 */
		if ((map.m_flags & EXT4_MAP_MAPPED) &&
		    block_in_file > map.m_lblk &&
		    block_in_file < (map.m_lblk + map.m_len)) {
			unsigned map_offset = block_in_file - map.m_lblk;
			unsigned last = map.m_len - map_offset;

			for (relative_block = 0; ; relative_block++) {
				if (relative_block == last) {
					/* needed? */
					map.m_flags &= ~EXT4_MAP_MAPPED;
					break;
				}
				if (page_block == blocks_per_page)
					break;
				blocks[page_block] = map.m_pblk + map_offset +
					relative_block;
				page_block++;
				block_in_file++;
			}
		}

		/*
		 * Then do more ext4_map_blocks() calls until we are
		 * done with this page.
		 */
		while (page_block < blocks_per_page) {
			if (block_in_file < last_block) {
				map.m_lblk = block_in_file;
				map.m_len = last_block - block_in_file;

				if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
				set_error_page:
					SetPageError(page);
					zero_user_segment(page, 0,
							  PAGE_CACHE_SIZE);
					unlock_page(page);
					goto next_page;
				}
			}
			if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
				fully_mapped = 0;
				if (first_hole == blocks_per_page)
					first_hole = page_block;
				page_block++;
				block_in_file++;
				continue;
			}
			if (first_hole != blocks_per_page)
				goto confused;		/* hole -> non-hole */

			/* Contiguous blocks? */
			if (page_block && blocks[page_block-1] != map.m_pblk-1)
				goto confused;
			for (relative_block = 0; ; relative_block++) {
				if (relative_block == map.m_len) {
					/* needed? */
					map.m_flags &= ~EXT4_MAP_MAPPED;
					break;
				} else if (page_block == blocks_per_page)
					break;
				blocks[page_block] = map.m_pblk+relative_block;
				page_block++;
				block_in_file++;
			}
		}
		if (first_hole != blocks_per_page) {
			zero_user_segment(page, first_hole << blkbits,
					  PAGE_CACHE_SIZE);
			if (first_hole == 0) {
				SetPageUptodate(page);
				unlock_page(page);
				goto next_page;
			}
		} else if (fully_mapped) {
			SetPageMappedToDisk(page);
		}
		if (fully_mapped && blocks_per_page == 1 &&
		    !PageUptodate(page) && cleancache_get_page(page) == 0) {
			SetPageUptodate(page);
			goto confused;
		}

		/*
		 * This page will go to BIO.  Do we need to send this
		 * BIO off first?
		 */
		if (bio && (last_block_in_bio != blocks[0] - 1)) {
		submit_and_realloc:
			ext4_submit_bio_read(bio);
			bio = NULL;
		}
		if (bio == NULL) {
			struct ext4_crypto_ctx *ctx = NULL;

			if (ext4_encrypted_inode(inode) &&
			    S_ISREG(inode->i_mode)) {
				ctx = ext4_get_crypto_ctx(inode);
				if (IS_ERR(ctx))
					goto set_error_page;
			}
			bio = bio_alloc(GFP_KERNEL,
				min_t(int, nr_pages, BIO_MAX_PAGES));
			if (!bio) {
				if (ctx)
					ext4_release_crypto_ctx(ctx);
				goto set_error_page;
			}
			bio->bi_bdev = bdev;
			bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
			bio->bi_end_io = mpage_end_io;
			bio->bi_private = ctx;
		}

		length = first_hole << blkbits;
		if (bio_add_page(bio, page, length, 0) < length)
			goto submit_and_realloc;

		if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
		     (relative_block == map.m_len)) ||
		    (first_hole != blocks_per_page)) {
			ext4_submit_bio_read(bio);
			bio = NULL;
		} else
			last_block_in_bio = blocks[blocks_per_page - 1];
		goto next_page;
	confused:
		if (bio) {
			ext4_submit_bio_read(bio);
			bio = NULL;
		}
		if (!PageUptodate(page))
			block_read_full_page(page, ext4_get_block);
		else
			unlock_page(page);
	next_page:
		if (pages)
			page_cache_release(page);
	}
	BUG_ON(pages && !list_empty(pages));
	if (bio)
		ext4_submit_bio_read(bio);
	return 0;
}


int bio_add_page(struct bio *bio, struct page *page,
		 unsigned int len, unsigned int offset)
{
	struct bio_vec *bv;

	/*
	 * cloned bio must not modify vec list
	 */
	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
		return 0;

	/*
	 * For filesystems with a blocksize smaller than the pagesize
	 * we will often be called with the same page as last time and
	 * a consecutive offset.  Optimize this special case.
	 */
	if (bio->bi_vcnt > 0) {
		bv = &bio->bi_io_vec[bio->bi_vcnt - 1];

		if (page == bv->bv_page &&
		    offset == bv->bv_offset + bv->bv_len) {
			bv->bv_len += len;
			goto done;
		}
	}

	if (bio->bi_vcnt >= bio->bi_max_vecs)
		return 0;

	bv		= &bio->bi_io_vec[bio->bi_vcnt];
	bv->bv_page	= page;
	bv->bv_len	= len;
	bv->bv_offset	= offset;

	bio->bi_vcnt++;
done:
	bio->bi_iter.bi_size += len;
	return len;
}

//
sumbit (bio)

/****************************************驱动程序****************************************************************/
getbio();
 struct bio_vec* bvec; 
 pRHdata = pdev->data + (bio->bi_sector * RAMHD_SECTOR_SIZE); 
  bio_for_each_segment(bvec, bio, i){ 
    pBuffer = kmap(bvec->bv_page) + bvec->bv_offset; 
    switch(bio_data_dir(bio)){ 
      case READ: 
        memcpy(pBuffer, pRHdata, bvec->bv_len); 
        flush_dcache_page(bvec->bv_page); 
        break; 
          
      case WRITE: 
        flush_dcache_page(bvec->bv_page); 
        memcpy(pRHdata, pBuffer, bvec->bv_len); 
        break; 
          
      default: 
        kunmap(bvec->bv_page); 
        goto out; 
    } 
      
    kunmap(bvec->bv_page); 
    pRHdata += bvec->bv_len; 
  }

 

  • 首先是疑问,就是在准备探索cfq和红黑树的时候,知道红黑树应该是根据磁盘的bi_sector进行树的组织的,那么这个bi_setctor是哪里来的,
所以找回去readpage流程,
  • 第二个疑问:然后之前也一直疑问bio,应该是好几个page组成的,之前没有看到,所以就用单个bio去探索也不影响分析代码,今天顺便一起看下

第一问解答:

从上面代码就知道了,第一个标黑体的地方和第二个标记黑体的地方就是bi_setctor的由来

const unsigned blkbits = inode->i_blkbits;

bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);

虽然还有些模糊,比如blkbits 是inode节点,相当于节点编号,相当于硬盘位置,但是这个iNode的又是最终是怎么来的呢?还需要探索。

还有减去9,还有blocks 左移是什么意思。

 

第二问解答:

bio_alloc

bio_add_page

 

如果bio还是空的,就用bio_alloc开辟一个,然后后面的page用bio_add_page,

然后在bio_add_page函数中我们对比bvec的小节点的数据流动,很清楚的看到,每个page的数据指针进行的数据传递,其实是个数组,并没有组织成链表。

 

# Linux必备书籍推荐

 

《LINUX内核源代码情景分析(上册) 》毛德操,胡希明      >>>京东购买     >>>淘宝购买 

《LINUX内核源代码情景分析(下册) 》毛德操,胡希明      >>>京东购买     >>>淘宝购买

《嵌入式Linux应用开发完全手册 》韦东山 著     >>>京东购买     >>>淘宝购买 领券

《深入理解Linux内核第3版》(美)博韦      >>>京东购买     >>>淘宝购买    

《鸟哥的Linux私房菜:基础学习篇(第四版)》鸟哥      >>>京东购买     >>>淘宝购买

 

#免费电子书领取

神农笔记微信公众号

扫一扫关注微信公众号,上述5本电子书免费领取。

https://pan.baidu.com/s/1q5IjXAmybs8NBseR4R8Ksg

扫码关注微信公众号,回复“Linux” ,即可获取提取码

 

--- Linux文件系统学习系列笔记 ---

(原创笔记,转载请联系博主授权)

Linux文件系统学习:整体框架图(1)

Linux文件系统学习:初始化过程(2)

Linux文件系统学习:文件read流程分析(3)

Linux文件系统学习:文件read和BIO调度分析(4)

Linux文件系统学习:文件write过程分析(5)

Linux文件系统学习:io调度框架(6)

Linux文件系统学习:io的提交过程(7)

Linux文件系统学习:io的plug过程-启动篇(8)

Linux文件系统学习:io的plug过程-request请求(9)

Linux文件系统学习:io的plug过程-blk_init_queue(10)

Linux文件系统学习:io的plug过程-blk_flush_plug_list的情况(11)

Linux文件系统学习:io的plug过程-queuelist的问题(12)

Linux文件系统学习:电梯算法简介(13)

Linux文件系统学习:电梯算法noop(14)

Linux文件系统学习:电梯算法deadline(15)

 

<欢迎关注微信公众号,第一时间查看最新内容>

神农笔记微信公众号

发表评论

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen: