本次分析介绍Docker是如何初始化容器网络的。本次将分析libnetwork以外的内容,主要包含两个方面:

  1. Daemon如何调用libnetwork初始化网络;
  2. runc如何调用libnetwork-setkey。

这两点也是libnetwork的两个入口。

Daemon部分

首先,容器网络初始化发生在容器启动过程,来看Daemon的Start()方法,定义在/daemon/start.go中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// Start starts a container
func (daemon *Daemon) Start(container *container.Container) error {
return daemon.containerStart(container)
}
func (daemon *Daemon) containerStart(container *container.Container) (err error) {
......
//***初始化网络***//
//***定义在/daemon/container_operations.go中***/
if err := daemon.initializeNetworking(container); err != nil {
return err
}
......
}

Start()方法调用containerStart()方法。在containerStart()中,调用initializeNetworking(container)初始化容器网络。

再来看initializeNetworking(),定义在/daemon/container_operations.go中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
func (daemon *Daemon) initializeNetworking(container *container.Container) error {
var err error
//***container网络模式***//
if container.HostConfig.NetworkMode.IsContainer() {
// we need to get the hosts files from the container to join
nc, err := daemon.getNetworkedContainer(container.ID, container.HostConfig.NetworkMode.ConnectedContainer())
if err != nil {
return err
}
container.HostnamePath = nc.HostnamePath
container.HostsPath = nc.HostsPath
container.ResolvConfPath = nc.ResolvConfPath
container.Config.Hostname = nc.Config.Hostname
container.Config.Domainname = nc.Config.Domainname
return nil
}
//***Host模式***//
if container.HostConfig.NetworkMode.IsHost() {
container.Config.Hostname, err = os.Hostname()
if err != nil {
return err
}
}
//***host模式,bridge模式***//
if err := daemon.allocateNetwork(container); err != nil {
return err
}
return container.BuildHostnameFile()
}

可以看到,initializeNetworking()会根据容器网络模式生成不同的容器。其中container模式会先返回,其他模式会调用allocateNetwork()。

allocateNetwork()定义如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
func (daemon *Daemon) allocateNetwork(container *container.Container) error {
//***daemon.netController为libnetwork.NetworkController***//
controller := daemon.netController
if daemon.netController == nil {
return nil
}
// Cleanup any stale sandbox left over due to ungraceful daemon shutdown
if err := controller.SandboxDestroy(container.ID); err != nil {
logrus.Errorf("failed to cleanup up stale network sandbox for container %s", container.ID)
}
updateSettings := false
if len(container.NetworkSettings.Networks) == 0 {
if container.Config.NetworkDisabled || container.HostConfig.NetworkMode.IsContainer() {
return nil
}
err := daemon.updateContainerNetworkSettings(container, nil)
if err != nil {
return err
}
updateSettings = true
}
// always connect default network first since only default
// network mode support link and we need do some setting
// on sandbox initialize for link, but the sandbox only be initialized
// on first network connecting.
//***container.NetworkSettings.Networks: map[bridge:0xc420081200]***//
//***defaultNetname: bridge***//
defaultNetName := runconfig.DefaultDaemonNetworkMode().NetworkName()
if nConf, ok := container.NetworkSettings.Networks[defaultNetName]; ok {
if err := daemon.connectToNetwork(container, defaultNetName, nConf, updateSettings); err != nil {
return err
}
}
for n, nConf := range container.NetworkSettings.Networks {
if n == defaultNetName {
continue
}
if err := daemon.connectToNetwork(container, n, nConf, updateSettings); err != nil {
return err
}
}
return container.WriteHostConfig()
}

如果创建容器时未指定网络,或网络为”bridge”,则执行:

1
2
3
4
5
6
7
defaultNetName := runconfig.DefaultDaemonNetworkMode().NetworkName()
if nConf, ok := container.NetworkSettings.Networks[defaultNetName]; ok {
if err := daemon.connectToNetwork(container, defaultNetName, nConf, updateSettings); err != nil {
return err
}
}

否则执行:

1
2
3
4
5
6
7
8
for n, nConf := range container.NetworkSettings.Networks {
if n == defaultNetName {
continue
}
if err := daemon.connectToNetwork(container, n, nConf, updateSettings); err != nil {
return err
}
}

此处n的值为可能为”test”(bridge名),”none”,”host”。

接着往下看connecToNetwork():

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
//***host模式:idOrName为host***//
//***bridge模式:idOrName为bridge或test***//
//***none模式:idOrName为none***//
func (daemon *Daemon) connectToNetwork(container *container.Container, idOrName string, endpointConfig *networktypes.EndpointSettings, updateSettings bool) (err error) {
if endpointConfig == nil {
endpointConfig = &networktypes.EndpointSettings{}
}
n, err := daemon.updateNetworkConfig(container, idOrName, endpointConfig, updateSettings)
if err != nil {
return err
}
if n == nil {
return nil
}
controller := daemon.netController
sb := daemon.getNetworkSandbox(container)
createOptions, err := container.BuildCreateEndpointOptions(n, endpointConfig, sb)
if err != nil {
return err
}
//***创建endpoint***//
endpointName := strings.TrimPrefix(container.Name, "/")
ep, err := n.CreateEndpoint(endpointName, createOptions...)
if err != nil {
return err
}
defer func() {
if err != nil {
if e := ep.Delete(false); e != nil {
logrus.Warnf("Could not rollback container connection to network %s", idOrName)
}
}
}()
container.NetworkSettings.Networks[n.Name()] = endpointConfig
if err := daemon.updateEndpointNetworkSettings(container, n, ep); err != nil {
return err
}
if sb == nil {
options, err := daemon.buildSandboxOptions(container)
if err != nil {
return err
}
//***创建sandbox***//
sb, err = controller.NewSandbox(container.ID, options...)
if err != nil {
return err
}
container.UpdateSandboxNetworkSettings(sb)
}
joinOptions, err := container.BuildJoinOptions(n)
if err != nil {
return err
}
//***调用endpoint的Join()***//
if err := ep.Join(sb, joinOptions...); err != nil {
return err
}
if err := container.UpdateJoinInfo(n, ep); err != nil {
return fmt.Errorf("Updating join info failed: %v", err)
}
container.NetworkSettings.Ports = getPortMapInfo(sb)
daemon.LogNetworkEventWithAttributes(n, "connect", map[string]string{"container": container.ID})
return nil
}

connectToNetwork()的流程如下:

  1. 调用n.CreateEndpoint()创建endpoint;
  2. 调用controller.NewSandbox()生成sandbox;
  3. 调用endpoint.Join()把endpoint加入到sandbox中。

关于controller,network,endpoint这些概念会在libnetwork中进行分析。这里只需知道按libnetwork的用法用即可,下面就是官方的例子:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
networkType := "bridge"
// Create a new controller instance
driverOptions := options.Generic{}
genericOption := make(map[string]interface{})
genericOption[netlabel.GenericData] = driverOptions
controller, err := libnetwork.New(config.OptionDriverConfig(networkType, genericOption))
if err != nil {
return
}
// Create a network for containers to join.
// NewNetwork accepts Variadic optional arguments that libnetwork and Drivers can make use of
network, err := controller.NewNetwork(networkType, "network1", "")
if err != nil {
return
}
// For each new container: allocate IP and interfaces. The returned network
// settings will be used for container infos (inspect and such), as well as
// iptables rules for port publishing. This info is contained or accessible
// from the returned endpoint.
ep, err := network.CreateEndpoint("Endpoint1")
if err != nil {
return
}
// Create the sandbox for the container.
// NewSandbox accepts Variadic optional arguments which libnetwork can use.
sbx, err := controller.NewSandbox("container1",
libnetwork.OptionHostname("test"),
libnetwork.OptionDomainname("docker.io"))
// A sandbox can join the endpoint via the join api.
err = ep.Join(sbx)
if err != nil {
return
}

这里要说明一下,none和bridge模式是通过libnetwork向docker reexec注册的”libnetwork-setkey”子命令完成的,在NewSandbox()时并不会生成net namespace。

NewSandbox()有不同的表现,都是因为有options,options由buildSandboxOptions()创建,buildSandboxOptions()也会在libnetwork中分析。

libnetwork-setkey

现在来看下libnetwork-setkey是怎么回事。
在使用libnetwork-setkey来设置namespace时,Docker生成容器的config.json中定义有:

1
2
3
4
5
6
7
8
9
10
11
12
"hooks": {
"prestart": [
{
"path": "/usr/local/bin/dockerd",
"args": [
"libnetwork-setkey",
"12a3c752fc752f0a9099bea56525a106092f78f411704e47eaf9c32520550025",
"9ca45d808e4c577ae6dceef7259ca41561cf1e990a1ae41d370c5df458aab3f5"
]
}
]
}

而在/runc/libcontainer/process_linux.go中,有start(),重点在loop循环和子进程通信的case procReady中的这段代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// call prestart hooks
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
if p.config.Config.Hooks != nil {
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Root: p.config.Config.Rootfs,
}
//***执行config中的hooks.prestart***//
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
}
}
}
}

在这段代码中,会执行config.json文件中定义的prehooks。而libnetwork-setkey子命令就是把外边创建好的net namespace传给libnetwork使用。关于libnetwork-setkey具体实现,将在libnetwork中详细分析。