Linux | 块设备了解之三层结构

Linux系统
405
0
0
2023-04-30

块设备是文件系统的底层支撑,完成数据的存储和访问。块设备也能脱离文件系统以螺设备的形式工作。

块设备以“块”为最小访问单元,可随机访问(定位寻址),逻辑上具有“通用设备层,物理磁盘,分区概念“。使用工作队列来完成延迟的IO操作。

块设备的处理流程较为复杂,包括通用块层,IO调度层和块设备驱动程序的三层组织架构。

img

通用块设备层负责将bio 到request请求的变换和组织。

  • 物理磁盘gendisk
  • 逻辑分区对象hd_struct
  • 块设备对象block_device
  • 物理磁盘请求队列request_queue
  • ~/Downloads/research/linux-5.15.4/include/linux/genhd.h
struct gendisk {
  /* major, first_minor and minors are input parameters only,
   * don't use directly.  Use disk_devt() and disk_max_parts().
   */
  int major;      /* major number of driver  主设备号*/
  int first_minor;   /*与磁盘关联的第一个次设备号*/
  int minors;     /* 次设备好的最大数量,没有分区的设备,此值为1 maximum number of minors, =1 for
                                         * disks that can't be partitioned. */

  char disk_name[DISK_NAME_LEN];  /* name of major driver 磁盘标准名称*/

  unsigned short events;    /* supported events */
  unsigned short event_flags;  /* flags related to event processing */

  struct xarray part_tbl;
  struct block_device *part0;

  const struct block_device_operations *fops;
  struct request_queue *queue;
  void *private_data;

  int flags;
  unsigned long state;
#define GD_NEED_PART_SCAN    0
#define GD_READ_ONLY      1
#define GD_DEAD        2

  struct mutex open_mutex;  /* open/close mutex */
  unsigned open_partitions;  /* number of open partitions */

  struct backing_dev_info  *bdi;
  struct kobject *slave_dir;
#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
  struct list_head slave_bdevs;
#endif
  struct timer_rand_state *random;
  atomic_t sync_io;    /* RAID */
  struct disk_events *ev;
#ifdef  CONFIG_BLK_DEV_INTEGRITY
  struct kobject integrity_kobj;
#endif  /* CONFIG_BLK_DEV_INTEGRITY */
#if IS_ENABLED(CONFIG_CDROM)
  struct cdrom_device_info *cdi;
#endif
  int node_id;
  struct badblocks *bb;
  struct lockdep_map lockdep_map;
  u64 diskseq;
};

~/Downloads/research/linux-5.15.4/include/linux/fs.h

/**
 * struct address_space - Contents of a cacheable, mappable object.
 * @host: Owner, either the inode or the block_device.
 * @i_pages: Cached pages.
 * @invalidate_lock: Guards coherency between page cache contents and
 *   file offset->disk block mappings in the filesystem during invalidates.
 *   It is also used to block modification of page cache contents through
 *   memory mappings.
 * @gfp_mask: Memory allocation flags to use for allocating pages.
 * @i_mmap_writable: Number of VM_SHARED mappings.
 * @nr_thps: Number of THPs in the pagecache (non-shmem only).
 * @i_mmap: Tree of private and shared mappings.
 * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
 * @nrpages: Number of page entries, protected by the i_pages lock.
 * @writeback_index: Writeback starts here.
 * @a_ops: Methods.
 * @flags: Error bits and flags (AS_*).
 * @wb_err: The most recent error which has occurred.
 * @private_lock: For use by the owner of the address_space.
 * @private_list: For use by the owner of the address_space.
 * @private_data: For use by the owner of the address_space.
 */
struct address_space {
  struct inode    *host;
  struct xarray    i_pages;
  struct rw_semaphore  invalidate_lock;
  gfp_t      gfp_mask;
  atomic_t    i_mmap_writable;
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
  /* number of thp, only for non-shmem files */
  atomic_t    nr_thps;
#endif
  struct rb_root_cached  i_mmap;
  struct rw_semaphore  i_mmap_rwsem;
  unsigned long    nrpages;
  pgoff_t      writeback_index;
  const struct address_space_operations *a_ops;
  unsigned long    flags;
  errseq_t    wb_err;
  spinlock_t    private_lock;
  struct list_head  private_list;
  void      *private_data;
} __attribute__((aligned(sizeof(long)))) __randomize_layout;

~/Downloads/research/linux-5.15.4/include/linux/fs.h

/*
 * Keep mostly read-only and often accessed (especially for
 * the RCU path lookup and 'stat' data) fields at the beginning
 * of the 'struct inode'
 */
struct inode {
  umode_t      i_mode;
  unsigned short    i_opflags;
  kuid_t      i_uid;
  kgid_t      i_gid;
  unsigned int    i_flags;

#ifdef CONFIG_FS_POSIX_ACL
  struct posix_acl  *i_acl;
  struct posix_acl  *i_default_acl;
#endif

  const struct inode_operations  *i_op;
  struct super_block  *i_sb;
  struct address_space  *i_mapping;

#ifdef CONFIG_SECURITY
  void      *i_security;
#endif

  /* Stat data, not accessed from path walking */
  unsigned long    i_ino;
  /*
   * Filesystems may only read i_nlink directly.  They shall use the
   * following functions for modification:
   *
   *    (set|clear|inc|drop)_nlink
   *    inode_(inc|dec)_link_count
   */
  union {
    const unsigned int i_nlink;
    unsigned int __i_nlink;
  };
  dev_t      i_rdev;
  loff_t      i_size;
  struct timespec64  i_atime;
  struct timespec64  i_mtime;
  struct timespec64  i_ctime;
  spinlock_t    i_lock;  /* i_blocks, i_bytes, maybe i_size */
  unsigned short          i_bytes;
  u8      i_blkbits;
  u8      i_write_hint;
  blkcnt_t    i_blocks;

#ifdef __NEED_I_SIZE_ORDERED
  seqcount_t    i_size_seqcount;
#endif

  /* Misc */
  unsigned long    i_state;
  struct rw_semaphore  i_rwsem;

  unsigned long    dirtied_when;  /* jiffies of first dirtying */
  unsigned long    dirtied_time_when;

  struct hlist_node  i_hash;
  struct list_head  i_io_list;  /* backing dev IO list */
#ifdef CONFIG_CGROUP_WRITEBACK
  struct bdi_writeback  *i_wb;    /* the associated cgroup wb */

  /* foreign inode detection, see wbc_detach_inode() */
  int      i_wb_frn_winner;
  u16      i_wb_frn_avg_time;
  u16      i_wb_frn_history;
#endif
  struct list_head  i_lru;    /* inode LRU list */
  struct list_head  i_sb_list;
  struct list_head  i_wb_list;  /* backing dev writeback list */
  union {
    struct hlist_head  i_dentry;
    struct rcu_head    i_rcu;
  };
  atomic64_t    i_version;
  atomic64_t    i_sequence; /* see futex */
  atomic_t    i_count;
  atomic_t    i_dio_count;
  atomic_t    i_writecount;
#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
  atomic_t    i_readcount; /* struct files open RO */
#endif
  union {
    const struct file_operations  *i_fop;  /* former ->i_op->default_file_ops */
    void (*free_inode)(struct inode *);
  };
  struct file_lock_context  *i_flctx;
  struct address_space  i_data;
  struct list_head  i_devices;
  union {
    struct pipe_inode_info  *i_pipe;
    struct cdev    *i_cdev;
    char      *i_link;
    unsigned    i_dir_seq;
  };

  __u32      i_generation;

#ifdef CONFIG_FSNOTIFY
  __u32      i_fsnotify_mask; /* all events this inode cares about */
  struct fsnotify_mark_connector __rcu  *i_fsnotify_marks;
#endif

#ifdef CONFIG_FS_ENCRYPTION
  struct fscrypt_info  *i_crypt_info;
#endif

#ifdef CONFIG_FS_VERITY
  struct fsverity_info  *i_verity_info;
#endif

  void      *i_private; /* fs or device private pointer */
} __randomize_layout;

~/Downloads/research/linux-5.15.4/include/linux/fs.h

struct file {
  union {
    struct llist_node  fu_llist;
    struct rcu_head   fu_rcuhead;
  } f_u;
  struct path    f_path;
  struct inode    *f_inode;  /* cached value */
  const struct file_operations  *f_op;

  /*
   * Protects f_ep, f_flags.
   * Must not be taken from IRQ context.
   */
  spinlock_t    f_lock;
  enum rw_hint    f_write_hint;
  atomic_long_t    f_count;
  unsigned int     f_flags;
  fmode_t      f_mode;
  struct mutex    f_pos_lock;
  loff_t      f_pos;
  struct fown_struct  f_owner;
  const struct cred  *f_cred;
  struct file_ra_state  f_ra;

  u64      f_version;
#ifdef CONFIG_SECURITY
  void      *f_security;
#endif
  /* needed for tty driver, and maybe others */
  void      *private_data;

#ifdef CONFIG_EPOLL
  /* Used by fs/eventpoll.c to link all the hooks to this file */
  struct hlist_head  *f_ep;
#endif /* #ifdef CONFIG_EPOLL */
  struct address_space  *f_mapping;
  errseq_t    f_wb_err;
  errseq_t    f_sb_err; /* for syncfs */
} __randomize_layout
  __attribute__((aligned(4)));  /* lest something weird decides that 2 is OK */