之前在”Docker镜像存储分析”中已经介绍过Docker是如何组织镜像存储目录的。在Docker中,主要有三个store来负责对镜像进行管理:

  1. imageStore: 负责imagedb目录管理;
  2. layerStore: 负责content目录管理;
  3. referenceStore: 负责repositories.json文件管理。

本次分析将介绍imageStore相关的知识。

imagedb

Docker1.12.3把镜像相关的内容入在/var/lib/docker/image/aufs/imagedb目录下。主要有content和metadata两个目录,content目录存储镜像config文件,metadata目录存储镜像的parent文件。

fs

先来看fs。fs是直接和imagedb目录打交道的模块。fs定义在/image/fs.go中:

1
2
3
4
5
// fs implements StoreBackend using the filesystem.
type fs struct {
sync.RWMutex
root string
}

fs的root字段表示imagedb目录,即/var/lib/docker/image/aufs/imagedb/。
fs实现了StorageBackend接口:

1
2
3
4
5
6
7
8
9
10
// StoreBackend provides interface for image.Store persistence
type StoreBackend interface {
Walk(f IDWalkFunc) error
Get(id ID) ([]byte, error)
Set(data []byte) (ID, error)
Delete(id ID) error
SetMetadata(id ID, key string, data []byte) error
GetMetadata(id ID, key string) ([]byte, error)
DeleteMetadata(id ID, key string) error
}

NewFSStoreBackend()

NewFSStoreBackend()可以生成一个新的fs:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
//***生成新的StoreBackend***//
func NewFSStoreBackend(root string) (StoreBackend, error) {
return newFSStore(root)
}
func newFSStore(root string) (*fs, error) {
s := &fs{
root: root,
}
//***创建image/aufs/imagedb/content***//
if err := os.MkdirAll(filepath.Join(root, contentDirName, string(digest.Canonical)), 0700); err != nil {
return nil, err
}
//***创建image/aufs/imagedb/metadata***//
if err := os.MkdirAll(filepath.Join(root, metadataDirName, string(digest.Canonical)), 0700); err != nil {
return nil, err
}
return s, nil
}

可以看出,NewFSStoreBackend()调用了newFSStore(),newFSStore()也只是创建了content和metadata目录。

路径组装方法

路径组装有两个方法:contentFile()和metadataDir(),分别可以获取content和metadata目录下的文件。

1
2
3
4
5
6
7
8
9
10
11
12
13
//***/var/lib/docker/image/aufs/imagedb/content/sha256/55552ff902826130b2efb07df7d69a4fe56e5d42abf2577e5c0d1d44154ea8ed***//
//***记录了镜像层config信息***//
func (s *fs) contentFile(id ID) string {
dgst := digest.Digest(id)
return filepath.Join(s.root, contentDirName, string(dgst.Algorithm()), dgst.Hex())
}
//***/var/lib/docker/image/aufs/imagedb/metadata/sha256/55552ff902826130b2efb07df7d69a4fe56e5d42abf2577e5c0d1d44154ea8ed/***//
//***里面有parent文件,记录了parent的信息***//
func (s *fs) metadataDir(id ID) string {
dgst := digest.Digest(id)
return filepath.Join(s.root, metadataDirName, string(dgst.Algorithm()), dgst.Hex())
}

Walk()

Walk()该当实现了目录的遍历。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
// Walk calls the supplied callback for each image ID in the storage backend.
//***遍历函数实现***//
func (s *fs) Walk(f IDWalkFunc) error {
// Only Canonical digest (sha256) is currently supported
s.RLock()
dir, err := ioutil.ReadDir(filepath.Join(s.root, contentDirName, string(digest.Canonical)))
s.RUnlock()
if err != nil {
return err
}
for _, v := range dir {
//***计算config的hash值***//
//***dgst: sha256:4e3af817fe2080001e58f7be0b8f2a6c3ac75baa1f5adf878bed4de6d99f141f***//
dgst := digest.NewDigestFromHex(string(digest.Canonical), v.Name())
if err := dgst.Validate(); err != nil {
logrus.Debugf("Skipping invalid digest %s: %s", dgst, err)
continue
}
if err := f(ID(dgst)); err != nil {
return err
}
}
return nil
}

Get()

Get()可以通过id获取镜像的config文件。获取config文件的流程很简单,从content目录下读取对应id的文件即可。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
// Get returns the content stored under a given ID.
//***获取镜像的信息***//
func (s *fs) Get(id ID) ([]byte, error) {
s.RLock()
defer s.RUnlock()
return s.get(id)
}
//***读取镜像层config,并返回***//
//***/var/lib/docker/image/aufs/imagedb/content/sha256/771c181795500f34c8a1c53b945560dfce8197a2840f839536c52560714a4201***//
func (s *fs) get(id ID) ([]byte, error) {
content, err := ioutil.ReadFile(s.contentFile(id))
if err != nil {
return nil, err
}
// todo: maybe optional
//***验证config的hash值***//
if ID(digest.FromBytes(content)) != id {
return nil, fmt.Errorf("failed to verify image: %v", id)
}
return content, nil
}

Set()

Set()可以设置镜像层的config文件。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// Set stores content under a given ID.
//***把镜像层的信息写入到具体ID文件中***//
func (s *fs) Set(data []byte) (ID, error) {
s.Lock()
defer s.Unlock()
if len(data) == 0 {
return "", fmt.Errorf("Invalid empty data")
}
id := ID(digest.FromBytes(data))
//***原子写,可以学习下***//
if err := ioutils.AtomicWriteFile(s.contentFile(id), data, 0600); err != nil {
return "", err
}
return id, nil
}

Delete()

Delete()可以依据id删除镜像层对应的文件或目录。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// Delete removes content and metadata files associated with the ID.
//***Fankang***//
//***删除content目录和metadata目录下对应id的文件***//
func (s *fs) Delete(id ID) error {
s.Lock()
defer s.Unlock()
if err := os.RemoveAll(s.metadataDir(id)); err != nil {
return err
}
if err := os.Remove(s.contentFile(id)); err != nil {
return err
}
return nil
}

SetMetadata()

SetMetadata()可以把内容写入到metadata目录下。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// SetMetadata sets metadata for a given ID. It fails if there's no base file.
//***设置镜像层的metadata***//
func (s *fs) SetMetadata(id ID, key string, data []byte) error {
s.Lock()
defer s.Unlock()
if _, err := s.get(id); err != nil {
return err
}
baseDir := filepath.Join(s.metadataDir(id))
if err := os.MkdirAll(baseDir, 0700); err != nil {
return err
}
return ioutils.AtomicWriteFile(filepath.Join(s.metadataDir(id), key), data, 0600)
}

GetMetadata()

从metadata目录下获取key对应的内容。

1
2
3
4
5
6
7
8
9
10
11
// GetMetadata returns metadata for a given ID.
//***获取镜像的metadata***//
func (s *fs) GetMetadata(id ID, key string) ([]byte, error) {
s.RLock()
defer s.RUnlock()
if _, err := s.get(id); err != nil {
return nil, err
}
return ioutil.ReadFile(filepath.Join(s.metadataDir(id), key))
}

DeleteMetadata()

DeleteMetadata()移除某id的metadata内容。

1
2
3
4
5
6
7
8
// DeleteMetadata removes the metadata associated with an ID.
//***删除镜像层的metadata***//
func (s *fs) DeleteMetadata(id ID, key string) error {
s.Lock()
defer s.Unlock()
return os.RemoveAll(filepath.Join(s.metadataDir(id), key))
}

image

接着来看image,image定义在/image/image.go中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
// V1Image stores the V1 image configuration.
type V1Image struct {
// ID a unique 64 character identifier of the image
ID string `json:"id,omitempty"`
// Parent id of the image
Parent string `json:"parent,omitempty"`
// Comment user added comment
Comment string `json:"comment,omitempty"`
// Created timestamp when image was created
Created time.Time `json:"created"`
// Container is the id of the container used to commit
Container string `json:"container,omitempty"`
// ContainerConfig is the configuration of the container that is committed into the image
ContainerConfig container.Config `json:"container_config,omitempty"`
// DockerVersion specifies version on which image is built
DockerVersion string `json:"docker_version,omitempty"`
// Author of the image
Author string `json:"author,omitempty"`
// Config is the configuration of the container received from the client
Config *container.Config `json:"config,omitempty"`
// Architecture is the hardware that the image is build and runs on
Architecture string `json:"architecture,omitempty"`
// OS is the operating system used to build and run the image
OS string `json:"os,omitempty"`
// Size is the total size of the image including all layers it is composed of
Size int64 `json:",omitempty"`
}
// Image stores the image configuration
type Image struct {
V1Image
Parent ID `json:"parent,omitempty"`
RootFS *RootFS `json:"rootfs,omitempty"`
History []History `json:"history,omitempty"`
OSVersion string `json:"os.version,omitempty"`
OSFeatures []string `json:"os.features,omitempty"`
// rawJSON caches the immutable JSON associated with this image.
rawJSON []byte
// computedID is the ID computed from the hash of the image config.
// Not to be confused with the legacy V1 ID in V1Image.
computedID ID
}

image的各字段可以和image的config对应起来。

RawJSON()

RawJSON()返回rawJSON。

1
2
3
4
// RawJSON returns the immutable JSON associated with the image.
func (img *Image) RawJSON() []byte {
return img.rawJSON
}

ID()

1
2
3
4
5
//***返回镜像层config的hash值***//
//***可以通过sha256sum命令进行计算***//
func (img *Image) ID() ID {
return img.computedID
}

ImageID()

返回镜像层的ID。

1
2
3
4
// ImageID stringizes ID.
func (img *Image) ImageID() string {
return string(img.ID())
}

RunConfig()

1
2
3
4
5
// RunConfig returns the image's container config.
//***返回config***//
func (img *Image) RunConfig() *container.Config {
return img.Config
}

MarshalJSON()

MarshalJSON()把镜像层的config序列化成json。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// MarshalJSON serializes the image to JSON. It sorts the top-level keys so
// that JSON that's been manipulated by a push/pull cycle with a legacy
// registry won't end up with a different key order.
func (img *Image) MarshalJSON() ([]byte, error) {
type MarshalImage Image
pass1, err := json.Marshal(MarshalImage(*img))
if err != nil {
return nil, err
}
var c map[string]*json.RawMessage
if err := json.Unmarshal(pass1, &c); err != nil {
return nil, err
}
return json.Marshal(c)
}

rootfs

rootfs表明Docker是如何组织镜像的,目前只有”layers”一种。rootfs定义在/image/rootfs_unix.go中:

1
2
3
4
5
6
7
8
9
10
11
12
// RootFS describes images root filesystem
// This is currently a placeholder that only supports layers. In the future
// this can be made into an interface that supports different implementations.
type RootFS struct {
Type string `json:"type"`
DiffIDs []layer.DiffID `json:"diff_ids,omitempty"`
}
// ChainID returns the ChainID for the top layer in RootFS.
func (r *RootFS) ChainID() layer.ChainID {
return layer.CreateChainID(r.DiffIDs)
}

可以看出,RootFS可以依据DiffIDs(由layer.tar哈希计算得出)计算layer的值。具体见镜像存储分析。

Store

Store可以操作整个imagedb。
Store定义在/image/store.go中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// Store is an interface for creating and accessing images
type Store interface {
Create(config []byte) (ID, error)
Get(id ID) (*Image, error)
Delete(id ID) ([]layer.Metadata, error)
Search(partialID string) (ID, error)
SetParent(id ID, parent ID) error
GetParent(id ID) (ID, error)
Children(id ID) []ID
Map() map[ID]*Image
Heads() map[ID]*Image
}
type store struct {
sync.Mutex
ls LayerGetReleaser
images map[ID]*imageMeta
fs StoreBackend
digestSet *digest.Set
}

可以看出,store中有images map维护ID和imageMeta的关系。
imageMeta定义如下:

1
2
3
4
5
type imageMeta struct {
layer layer.Layer
//***children中的key ID用来标识子镜像层***//
children map[ID]struct{}
}

imageMeta中存有layer信息及该镜像层的children信息。

NewImageStore()

来看下store的生成函数NewImageStore()。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
// NewImageStore returns new store object for given layer store
//***返回一个新的image store***//
func NewImageStore(fs StoreBackend, ls LayerGetReleaser) (Store, error) {
is := &store{
ls: ls,
images: make(map[ID]*imageMeta),
fs: fs,
digestSet: digest.NewSet(),
}
// load all current images and retain layers
if err := is.restore(); err != nil {
return nil, err
}
//***在只有nginx:v1的情况下***//
//***is: &{{0 0} 0xc42048c240 map[sha256:4e3af817fe2080001e58f7be0b8f2a6c3ac75baa1f5adf878bed4de6d99f141f:0xc4204b0760] 0xc4204977d0 0xc420497830}***//
return is, nil
}
func (is *store) restore() error {
err := is.fs.Walk(func(id ID) error {
//***根据id获取config,再得到img***//
img, err := is.Get(id)
if err != nil {
logrus.Errorf("invalid image %v, %v", id, err)
return nil
}
var l layer.Layer
if chainID := img.RootFS.ChainID(); chainID != "" {
l, err = is.ls.Get(chainID)
if err != nil {
return err
}
}
if err := is.digestSet.Add(digest.Digest(id)); err != nil {
return err
}
imageMeta := &imageMeta{
layer: l,
children: make(map[ID]struct{}),
}
is.images[ID(id)] = imageMeta
return nil
})
if err != nil {
return err
}
// Second pass to fill in children maps
//***处理镜像parent***//
for id := range is.images {
if parent, err := is.GetParent(id); err == nil {
if parentMeta := is.images[parent]; parentMeta != nil {
parentMeta.children[id] = struct{}{}
}
}
}
return nil
}

其中restore()是直接从imagedb目录构建imageStore。

Create()

Create()可以从config生成一个镜像层。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
func (is *store) Create(config []byte) (ID, error) {
var img Image
//***把config还原成img***//
err := json.Unmarshal(config, &img)
if err != nil {
return "", err
}
// Must reject any config that references diffIDs from the history
// which aren't among the rootfs layers.
rootFSLayers := make(map[layer.DiffID]struct{})
for _, diffID := range img.RootFS.DiffIDs {
rootFSLayers[diffID] = struct{}{}
}
layerCounter := 0
for _, h := range img.History {
if !h.EmptyLayer {
layerCounter++
}
}
if layerCounter > len(img.RootFS.DiffIDs) {
return "", errors.New("too many non-empty layers in History section")
}
//***存储镜像config并计算镜像ID***//
dgst, err := is.fs.Set(config)
if err != nil {
return "", err
}
imageID := ID(dgst)
is.Lock()
defer is.Unlock()
if _, exists := is.images[imageID]; exists {
return imageID, nil
}
layerID := img.RootFS.ChainID()
var l layer.Layer
if layerID != "" {
l, err = is.ls.Get(layerID)
if err != nil {
return "", err
}
}
imageMeta := &imageMeta{
layer: l,
children: make(map[ID]struct{}),
}
is.images[imageID] = imageMeta
if err := is.digestSet.Add(digest.Digest(imageID)); err != nil {
delete(is.images, imageID)
return "", err
}
return imageID, nil
}

Get()

Get()先从fs中获取config,然后调用NewFromJSON(config)创建一个img,再设置img的computedID(就是该image的config的hash值)及Parent。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
//***依据id获取镜像的config,并依据config返回镜像***//
func (is *store) Get(id ID) (*Image, error) {
// todo: Check if image is in images
// todo: Detect manual insertions and start using them
//***获取id对应的config内容***//
config, err := is.fs.Get(id)
if err != nil {
return nil, err
}
//***把config还原成镜像***//
img, err := NewFromJSON(config)
if err != nil {
return nil, err
}
//***设置image的computedID***//
img.computedID = id
//***设置image的Parent***//
img.Parent, err = is.GetParent(id)
if err != nil {
img.Parent = ""
}
return img, nil
}

Delete()

Delete()可以删除一个镜像层。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
//***清理与parent, children的联系,然后删除该镜像层***//
func (is *store) Delete(id ID) ([]layer.Metadata, error) {
is.Lock()
defer is.Unlock()
imageMeta := is.images[id]
if imageMeta == nil {
return nil, fmt.Errorf("unrecognized image ID %s", id.String())
}
for id := range imageMeta.children {
is.fs.DeleteMetadata(id, "parent")
}
if parent, err := is.GetParent(id); err == nil && is.images[parent] != nil {
delete(is.images[parent].children, id)
}
if err := is.digestSet.Remove(digest.Digest(id)); err != nil {
logrus.Errorf("error removing %s from digest set: %q", id, err)
}
delete(is.images, id)
is.fs.Delete(id)
if imageMeta.layer != nil {
return is.ls.Release(imageMeta.layer)
}
return nil, nil
}

SetParent()

Setparent()可以建立两个id之前的父子层关系。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
//***先清理与旧parent的联系,再建立和新parent的联系***//
func (is *store) SetParent(id, parent ID) error {
is.Lock()
defer is.Unlock()
parentMeta := is.images[parent]
if parentMeta == nil {
return fmt.Errorf("unknown parent image ID %s", parent.String())
}
if parent, err := is.GetParent(id); err == nil && is.images[parent] != nil {
delete(is.images[parent].children, id)
}
parentMeta.children[id] = struct{}{}
return is.fs.SetMetadata(id, "parent", []byte(parent))
}

GetParent()

GetParent()可以获取镜像层的父层。

1
2
3
4
5
6
7
8
9
//***获取镜像层的parent***//
//***通过读取imagedb下的parent文件***//
func (is *store) GetParent(id ID) (ID, error) {
d, err := is.fs.GetMetadata(id, "parent")
if err != nil {
return "", err
}
return ID(d), nil // todo: validate?
}

Children()

Children()可以获取镜像层的所有子层。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
//***调用children()获取children***//
func (is *store) Children(id ID) []ID {
is.Lock()
defer is.Unlock()
return is.children(id)
}
//***返回镜像层的children***//
func (is *store) children(id ID) []ID {
var ids []ID
if is.images[id] != nil {
for id := range is.images[id].children {
ids = append(ids, id)
}
}
return ids
}

Map()

Map()返回所有的镜像层。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
//***获取所有镜像层***//
func (is *store) Map() map[ID]*Image {
return is.imagesMap(true)
}
func (is *store) imagesMap(all bool) map[ID]*Image {
is.Lock()
defer is.Unlock()
images := make(map[ID]*Image)
for id := range is.images {
if !all && len(is.children(id)) > 0 {
continue
}
img, err := is.Get(id)
if err != nil {
logrus.Errorf("invalid image access: %q, error: %q", id, err)
continue
}
images[id] = img
}
return images
}