Commit aa719a6f authored by 操斧作斤二分队's avatar 操斧作斤二分队
Browse files

Upload New File

parent d64bf086
No related merge requests found
Showing with 509 additions and 0 deletions
+509 -0
# Technical Report
## Synchronous IO
### VFS to FS
A synchronous IO can be identified by the current process id (pid) because the process will be blocked when a synchronous IO occurs.
**Weakness:** 如果一个进程发起了一个同步 IO 操作,然后该进程退出,并且其进程 ID 被一个新的进程复用,那么可能会出现两个不同的 IO 操作具有相同的 "标识符"。(理论上存在,但在实际情况中几乎不可能出现])
In Linux 5.15.1, there are three important branches in vfs_read() as follows:
```c
if (file->f_op->read)
ret = file->f_op->read(file, buf, count, pos);
else if (file->f_op->read_iter)
ret = new_sync_read(file, buf, count, pos);
else
ret = -EINVAL;
```
For example, if the file operating locates in ext4 file system, it will choose the second branch if all goes on well. Because the `file->f_op->read` has no definition in `ext4_file_operations` .
We first choose ext4 file system to analyze:
The functions called by a synchronous IO are as follows (for ext4 fs):
```
------------------------------------------ File System Layer ------------------------------------------------------
|-- vfs_read
|--- file->op->read (×)
|--- new_sync_read
|-- call_read_iter
|-- file->op->read_iter (ext4_file_read_iter)
|-- ext4_dax_read_iter (DAX)
|-- ext4_dio_read_iter (Direct IO)
|--
|-- generic_file_read_iter
|-- Direct IO (to avoid: some fs have no its own direct io process function)
|-- filemap_read (read data from page cache)
|-- filemap_get_pages
|-- filemap_get_read_batch
|-- page_cache_get_speculative
|-- __page_cache_add_speculative
|-- page_cache_sync_readahead
|-- page_cache_sync_ra
|-- force_page_cache_ra
|-- ext4_readahead
......
|-- ondemand_readahead
|-- do_page_cache_ra
|-- page_cache_ra_unbounded
|-- read_pages
|-- ext4_readahead
|-- ext4_mpage_readpages
|-- filemap_create_page
|-- filemap_read_page
|-- mapping->a_ops->readpage (ext4_readpage)
|-- ext4_readpage_inline
|-- ext4_mpage_readpages
|-- submit_bio
------------------------------------------ File System Layer ------------------------------------------------------
--------------------------------------------- Block Layer ---------------------------------------------------------
|-- submit_bio
|-- submit_bio_noacct
|-- __submit_bio_noacct [it will not be called for scsi sd device]
|-- __submit_bio
......
|-- __submit_bio_noacct_mq
|-- __submit_bio
|-- disk->fops->submit_bio [null for scsi sd device]
|-- blk_mq_submit_bio
|-- blk_queue_bounce [bouncing]
|-- __blk_queue_split
|-- blk_attempt_plug_merge
|-- blk_try_merge
|-- blk_insert_flush & blk_mq_run_hw_queue
|-- blk_add_rq_to_plug
|-- blk_mq_flush_plug_list
|-- blk_mq_sched_insert_requests
|-- e->type->ops.insert_requests(dd_insert_requests)
|-- dd_insert_request
|-- ...
|-- blk_mq_try_issue_directly
|-- blk_mq_sched_insert_request
|-- blk_mq_request_bypass_insert & blk_mq_run_hw_queue
|-- __blk_mq_insert_request
|-- e->type->ops.insert_requests (dd_insert_requests)
|-- dd_insert_request
--------------------------------------------- Block Layer ---------------------------------------------------------
--------------------------------------------- SCSI MPTSPI Driver --------------------------------------------------
|-- mpt_interrupt
|-- mpt_reply
|-- MptCallbacks[cb_idx](ioc, mf, mr) [mptscsih_io_done]
|-- scsi_done [scsi_mq_done]
|-- blk_mq_complete_request_remote
|-- blk_mq_complete_send_ipi
|-- __blk_mq_complete_request_remote
|-- __raise_softirq_irqoff(BLOCK_SOFTIRQ)
|-- blk_done_softirq
......
|-- blk_mq_raise_softirq
|-- raise_softirq(BLOCK_SOFTIRQ)
|-- blk_done_softirq
|-- blk_complete_reqs
|-- rq->q->mq_ops->complete[scsi_complete]
|-- scsi_finish_command
|-- scsi_io_completion
|-- scsi_end_request
|-- rq->end_io[bio_endio]
|-- scsi_queue_insert
|-- scsi_eh_scmd_add
|-- rq->q->mq_ops->complete []
|-- mpt_turbo_reply
--------------------------------------------- SCSI MPTSPI Driver --------------------------------------------------
```
```shell
name: block_rq_remap
ID: 1181
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:unsigned int nr_sector; offset:24; size:4; signed:0;
field:dev_t old_dev; offset:28; size:4; signed:0;
field:sector_t old_sector; offset:32; size:8; signed:0;
field:unsigned int nr_bios; offset:40; size:4; signed:0;
field:char rwbs[8]; offset:44; size:8; signed:1;
print fmt: "%d,%d %s %llu + %u <- (%d,%d) %llu %u", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, (unsigned long long)REC->sector, REC->nr_sector, ((unsigned int) ((REC->old_dev) >> 20)), ((unsigned int) ((REC->old_dev) & ((1U << 20) - 1))), (unsigned long long)REC->old_sector, REC->nr_bios
```
```shell
name: block_bio_remap
ID: 1182
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:unsigned int nr_sector; offset:24; size:4; signed:0;
field:dev_t old_dev; offset:28; size:4; signed:0;
field:sector_t old_sector; offset:32; size:8; signed:0;
field:char rwbs[8]; offset:40; size:8; signed:1;
print fmt: "%d,%d %s %llu + %u <- (%d,%d) %llu", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, (unsigned long long)REC->sector, REC->nr_sector, ((unsigned int) ((REC->old_dev) >> 20)), ((unsigned int) ((REC->old_dev) & ((1U << 20) - 1))), (unsigned long long)REC->old_sector
```
```shell
name: block_split
ID: 1183
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:sector_t new_sector; offset:24; size:8; signed:0;
field:char rwbs[8]; offset:32; size:8; signed:1;
field:char comm[16]; offset:40; size:16; signed:1;
print fmt: "%d,%d %s %llu / %llu [%s]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, (unsigned long long)REC->sector, (unsigned long long)REC->new_sector, REC->comm
blk_queue_split/blk_mq_submit_bio -> __blk_queue_split -> block_split
```
```shell
name: block_unplug
ID: 1184
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:int nr_rq; offset:8; size:4; signed:1;
field:char comm[16]; offset:12; size:16; signed:1;
print fmt: "[%s] %d", REC->comm, REC->nr_rq
blk_mq_flush_plug_list -> block_unplug
blk_mq_submit_bio -> block_unplug
```
```shell
name: block_plug
ID: 1185
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:char comm[16]; offset:8; size:16; signed:1;
print fmt: "[%s]", REC->comm
blk_mq_submit_bio -> block_plug
```
```shell
name: block_getrq
ID: 1186
format:
#
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1;signed:0;
field:int common_pid; offset:4; size:4; signed:1;
# 目标设备号
field:dev_t dev; offset:8; size:4; signed:0;
# I/O 请求的起始扇区
field:sector_t sector; offset:16; size:8; signed:0;
# I/O 请求的扇区数
field:unsigned int nr_sector; offset:24; size:4; signed:0;
# I/O 类型和属性
field:char rwbs[8]; offset:28; size:8; signed:1;
# 进程名
field:char comm[16]; offset:36; size:16; signed:1;
print fmt: "%d,%d %s %llu + %u [%s]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, (unsigned long long)REC->sector, REC->nr_sector, REC->comm
blk_mq_submit_bio
```
```shell
name: block_bio_queue
ID: 1187
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:unsigned int nr_sector; offset:24; size:4; signed:0;
field:char rwbs[8]; offset:28; size:8; signed:1;
field:char comm[16]; offset:36; size:16; signed:1;
print fmt: "%d,%d %s %llu + %u [%s]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, (unsigned long long)REC->sector, REC->nr_sector, REC->comm
submit_bio_noacct -> __submit_bio_noacct/__submit_bio_noacct_mq -> __submit_bio -> submit_bio_checks -> block_bio_queue
```
```shell
name: block_bio_frontmerge
ID: 1188
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:unsigned int nr_sector; offset:24; size:4; signed:0;
field:char rwbs[8]; offset:28; size:8; signed:1;
field:char comm[16]; offset:36; size:16; signed:1;
print fmt: "%d,%d %s %llu + %u [%s]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, (unsigned long long)REC->sector, REC->nr_sector, REC->comm
blk_attempt_bio_merge/blk_mq_sched_try_merge -> bio_attempt_front_merge -> block_bio_frontmerge
```
```shell
name: block_bio_backmerge
ID: 1189
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:unsigned int nr_sector; offset:24; size:4; signed:0;
field:char rwbs[8]; offset:28; size:8; signed:1;
field:char comm[16]; offset:36; size:16; signed:1;
print fmt: "%d,%d %s %llu + %u [%s]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, (unsigned long long)REC->sector, REC->nr_sector, REC->comm
```
```shell
name: block_bio_bounce
ID: 1190
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:unsigned int nr_sector; offset:24; size:4; signed:0;
field:char rwbs[8]; offset:28; size:8; signed:1;
field:char comm[16]; offset:36; size:16; signed:1;
print fmt: "%d,%d %s %llu + %u [%s]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, (unsigned long long)REC->sector, REC->nr_sector, REC->comm
blk_queue_bounce -> __blk_queue_bounce -> block_bio_bounce
```
```shell
name: block_bio_complete
ID: 1191
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:unsigned nr_sector; offset:24; size:4; signed:0;
field:int error; offset:28; size:4; signed:1;
field:char rwbs[8]; offset:32; size:8; signed:1;
print fmt: "%d,%d %s %llu + %u [%d]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, (unsigned long long)REC->sector, REC->nr_sector, REC->error
bio_endio -> block_bio_complete
```
```shell
name: block_rq_merge
ID: 1192
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:unsigned int nr_sector; offset:24; size:4; signed:0;
field:unsigned int bytes; offset:28; size:4; signed:0;
field:char rwbs[8]; offset:32; size:8; signed:1;
field:char comm[16]; offset:40; size:16; signed:1;
field:__data_loc char[] cmd; offset:56; size:4; signed:1;
print fmt: "%d,%d %s %u (%s) %llu + %u [%s]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, REC->bytes, __get_str(cmd), (unsigned long long)REC->sector, REC->nr_sector, REC->comm
attempt_back_merge/attempt_front_merge/blk_attempt_req_merge->attempt_merge -> block_rq_merge
```
```shell
name: block_rq_issue
ID: 1193
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:unsigned int nr_sector; offset:24; size:4; signed:0;
field:unsigned int bytes; offset:28; size:4; signed:0;
field:char rwbs[8]; offset:32; size:8; signed:1;
field:char comm[16]; offset:40; size:16; signed:1;
field:__data_loc char[] cmd; offset:56; size:4; signed:1;
print fmt: "%d,%d %s %u (%s) %llu + %u [%s]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, REC->bytes, __get_str(cmd), (unsigned long long)REC->sector, REC->nr_sector, REC->comm
blk_mq_start_request -> block_rq_issue
```
```shell
name: block_rq_insert
ID: 1194
usage: "插入请求队列"
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:unsigned int nr_sector; offset:24; size:4; signed:0;
# I/O 请求的字节数
field:unsigned int bytes; offset:28; size:4; signed:0;
field:char rwbs[8]; offset:32; size:8; signed:1;
field:char comm[16]; offset:40; size:16; signed:1;
field:__data_loc char[] cmd; offset:56; size:4; signed:1;
print fmt: "%d,%d %s %u (%s) %llu + %u [%s]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, REC->bytes, __get_str(cmd), (unsigned long long)REC->sector, REC->nr_sector, REC->comm
__blk_mq_insert_req_list -> block_rq_insert
blk_mq_insert_requests -> block_rq_insert
```
```shell
name: block_rq_complete
ID: 1195
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:unsigned int nr_sector; offset:24; size:4; signed:0;
field:int error; offset:28; size:4; signed:1;
field:char rwbs[8]; offset:32; size:8; signed:1;
field:__data_loc char[] cmd; offset:40; size:4; signed:1;
print fmt: "%d,%d %s (%s) %llu + %u [%d]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, __get_str(cmd), (unsigned long long)REC->sector, REC->nr_sector, REC->error
scsi_end_request -> blk_update_request -> block_rq_complete
```
```shell
name: block_rq_requeue
ID: 1196
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:unsigned int nr_sector; offset:24; size:4; signed:0;
field:char rwbs[8]; offset:28; size:8; signed:1;
field:__data_loc char[] cmd; offset:36; size:4; signed:1;
print fmt: "%d,%d %s (%s) %llu + %u [%d]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, __get_str(cmd), (unsigned long long)REC->sector, REC->nr_sector, 0
blk_mq_requeue_request/blk_mq_handle_dev_resource/blk_mq_handle_zone_resource/__blk_mq_issue_directly -> __blk_mq_requeue_request -> block_rq_requeue
```
```shell
name: block_dirty_buffer
ID: 1197
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:size_t size; offset:24; size:8; signed:0;
print fmt: "%d,%d sector=%llu size=%zu", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), (unsigned long long)REC->sector, REC->size
mark_buffer_dirty -> block_dirty_buffer
```
```shell
name: block_touch_buffer
ID: 1198
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:dev_t dev; offset:8; size:4; signed:0;
field:sector_t sector; offset:16; size:8; signed:0;
field:size_t size; offset:24; size:8; signed:0;
print fmt: "%d,%d sector=%llu size=%zu", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), (unsigned long long)REC->sector, REC->size
touch_buffer -> block_touch_buffer
```
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment