libnetwork源码分析(一)-controller(3)-v1-12-3

本次文档将接着上次分析，介绍controller对sandbox的管理及sandbox的相关代码的分析。

(三) 管理sandbox

在controller中，有sandboxes字段存储sandbox。

controller中相关函数

controller:NewSandbox()

NewSandbox()可以创建一个新的sandbox。

//***创建sandbox，并放入到controller的sandboxes中***//
func (c *controller) NewSandbox(containerID string, options ...SandboxOption) (sBox Sandbox, err error) {
	if containerID == "" {
		return nil, types.BadRequestErrorf("invalid container ID")
	}
	var sb *sandbox
	c.Lock()
	//***检查是否冲突***//
	for _, s := range c.sandboxes {
		if s.containerID == containerID {
			// If not a stub, then we already have a complete sandbox.
			if !s.isStub {
				sbID := s.ID()
				c.Unlock()
				return nil, types.ForbiddenErrorf("container %s is already present in sandbox %s", containerID, sbID)
			}
			// We already have a stub sandbox from the
			// store. Make use of it so that we don't lose
			// the endpoints from store but reset the
			// isStub flag.
			sb = s
			sb.isStub = false
			break
		}
	}
	c.Unlock()
	// Create sandbox and process options first. Key generation depends on an option
	//***构造sandbox***//
	if sb == nil {
		sb = &sandbox{
			id:                 stringid.GenerateRandomID(),
			containerID:        containerID,
			endpoints:          epHeap{},
			epPriority:         map[string]int{},
			populatedEndpoints: map[string]struct{}{},
			config:             containerConfig{},
			controller:         c,
		}
	}
	sBox = sb
	heap.Init(&sb.endpoints)
	sb.processOptions(options...)
	c.Lock()
	if sb.ingress && c.ingressSandbox != nil {
		c.Unlock()
		return nil, types.ForbiddenErrorf("ingress sandbox already present")
	}
	if sb.ingress {
		c.ingressSandbox = sb
		sb.id = "ingress_sbox"
	}
	c.Unlock()
	defer func() {
		if err != nil {
			c.Lock()
			if sb.ingress {
				c.ingressSandbox = nil
			}
			c.Unlock()
		}
	}()
	if err = sb.setupResolutionFiles(); err != nil {
		return nil, err
	}
	//***host模式会设置useDefaultSandbox***//
	if sb.config.useDefaultSandBox {
		c.sboxOnce.Do(func() {
			c.defOsSbox, err = osl.NewSandbox(sb.Key(), false, false)
		})
		if err != nil {
			c.sboxOnce = sync.Once{}
			return nil, fmt.Errorf("failed to create default sandbox: %v", err)
		}
		sb.osSbox = c.defOsSbox
	}
	//***bridge和none模式都会把useExternalKey设置为true***//
	if sb.osSbox == nil && !sb.config.useExternalKey {
		if sb.osSbox, err = osl.NewSandbox(sb.Key(), !sb.config.useDefaultSandBox, false); err != nil {
			return nil, fmt.Errorf("failed to create new osl sandbox: %v", err)
		}
	}
	c.Lock()
	//***放入controller的sandboxes中***//
	c.sandboxes[sb.id] = sb
	c.Unlock()
	defer func() {
		if err != nil {
			c.Lock()
			delete(c.sandboxes, sb.id)
			c.Unlock()
		}
	}()
	//***存储sandbox***//
	err = sb.storeUpdate()
	if err != nil {
		return nil, fmt.Errorf("updating the store state of sandbox failed: %v", err)
	}
	return sb, nil
}

NewSandbox()的流程如下：

重名检查；
构造sandbox结构体值；
初始化sandbox的endpoints字段；
处理useDefaultSandBox情况，生成osSbox，并赋值给sb.osSobx，host模式走此分支；
处理!useExternalKey情况生成osSbox，与useDefaultSandBox不同的是，osl.NewSandbox()传入的有效参数为true；
把sandbox放入controller的sandboxes中；
调用sb.storeUpdate()存储sandbox。

controller::Sandboxes()

Sandboxes()可以获取controller的sandboxes字段中的sandbox。

//***返回controller的sandboxes字段***//
func (c *controller) Sandboxes() []Sandbox {
	c.Lock()
	defer c.Unlock()
	list := make([]Sandbox, 0, len(c.sandboxes))
	for _, s := range c.sandboxes {
		// Hide stub sandboxes from libnetwork users
		if s.isStub {
			continue
		}
		list = append(list, s)
	}
	return list
}

controller::WalkSandboxes()

WalkSandboxes()对每一个sandbox做操作，直到walker()返回true。

func (c *controller) WalkSandboxes(walker SandboxWalker) {
	for _, sb := range c.Sandboxes() {
		if walker(sb) {
			return
		}
	}
}

controller::SandboxByID()

SandboxByID()依据id获取sandbox。

//***依据id获取sandbox，id随机生成***//
func (c *controller) SandboxByID(id string) (Sandbox, error) {
	if id == "" {
		return nil, ErrInvalidID(id)
	}
	c.Lock()
	s, ok := c.sandboxes[id]
	c.Unlock()
	if !ok {
		return nil, types.NotFoundErrorf("sandbox %s not found", id)
	}
	return s, nil
}

controller::SandboxDestroy()

SandboxDestroy()可以依据id删除sandbox。

//***调用sandbox的Delete()方法删除sandbox***//
func (c *controller) SandboxDestroy(id string) error {
	var sb *sandbox
	c.Lock()
	for _, s := range c.sandboxes {
		if s.containerID == id {
			sb = s
			break
		}
	}
	c.Unlock()
	// It is not an error if sandbox is not available
	if sb == nil {
		return nil
	}
	return sb.Delete()
}

controller::SandboxContainerWalker()

SandboxContainerWalker()可以返回指定containerID的walker，该walker可以找到与containerID匹配的sandbox，并把sandbox赋给out。

//***依据containerID找到sandbox，并赋给out***//
func SandboxContainerWalker(out *Sandbox, containerID string) SandboxWalker {
	return func(sb Sandbox) bool {
		if sb.ContainerID() == containerID {
			*out = sb
			return true
		}
		return false
	}
}

controller::SandboxKeyWalker()

与SandboxContainerWalker()相同，只是比较的是存储Key。

//***依据存储的key找到sandbox，并赋给out***//
func SandboxKeyWalker(out *Sandbox, key string) SandboxWalker {
	return func(sb Sandbox) bool {
		if sb.Key() == key {
			*out = sb
			return true
		}
		return false
	}
}

Sandbox

再来看下sandbox的定义，Sandbox定义在/libnetwork/sandbox.go中，表示一个net namespace：

type sandbox struct {
	id                 string
	containerID        string
	config             containerConfig
	extDNS             []string
	osSbox             osl.Sandbox
	controller         *controller
	resolver           Resolver
	resolverOnce       sync.Once
	refCnt             int
	endpoints          epHeap
	epPriority         map[string]int
	populatedEndpoints map[string]struct{}
	joinLeaveDone      chan struct{}
	dbIndex            uint64
	dbExists           bool
	isStub             bool
	inDelete           bool
	ingress            bool
	sync.Mutex
}

主要字段含义如下：

id: 随机生成的sandbox号；
containerID: 容器ID；
osSbox: 真正的net namespace；
controller: sandbox所属的controller；
resolver: 容器的DNS域名服务器(因为对swarm不熟悉，所以还不知道为什么不直接设置一个公共的域名服务器)；
endpoints: 该sandbox中绑定的endpoint，endpoints的类型是epHeap，但实际上是[]*endpoint；
epPriority: endpoints的优先级；

sandbox::ID()

ID()可以返回sandbox的id。

//***获取sandbox的id***//
func (sb *sandbox) ID() string {
	return sb.id
}

sandbox::ContainerID()

ContainerID()可以返回sandbox的containerID。

//***获取sandbox的containerID***//
func (sb *sandbox) ContainerID() string {
	return sb.containerID
}

sandbox::Key()

Key()可以返回该sandbox的存储key。

func (sb *sandbox) Key() string {
	if sb.config.useDefaultSandBox {
		return osl.GenerateKey("default")
	}
	return osl.GenerateKey(sb.id)
}

sandbox::Delete()

Delete()可以删除本sandbox。Delete()调用了delete()，delete()的流程如下：

标识inDelete，表示该sandbox在删除中；
释放绑定的endpoints；
停止resolver；
移除osSbox，即真正的net namespace；
从store中删除；
从controller中删除该sandbox。

其中步骤2通过调用endpoint的Leave()和Delete()完成。

func (sb *sandbox) Delete() error {
	return sb.delete(false)
}
func (sb *sandbox) delete(force bool) error {
	sb.Lock()
	if sb.inDelete {
		sb.Unlock()
		return types.ForbiddenErrorf("another sandbox delete in progress")
	}
	// Set the inDelete flag. This will ensure that we don't
	// update the store until we have completed all the endpoint
	// leaves and deletes. And when endpoint leaves and deletes
	// are completed then we can finally delete the sandbox object
	// altogether from the data store. If the daemon exits
	// ungracefully in the middle of a sandbox delete this way we
	// will have all the references to the endpoints in the
	// sandbox so that we can clean them up when we restart
	sb.inDelete = true
	sb.Unlock()
	//***获取controller***//
	c := sb.controller
	// Detach from all endpoints
	//***处理绑定的endpoint***//
	retain := false
	for _, ep := range sb.getConnectedEndpoints() {
		// gw network endpoint detach and removal are automatic
		if ep.endpointInGWNetwork() && !force {
			continue
		}
		// Retain the sanbdox if we can't obtain the network from store.
		if _, err := c.getNetworkFromStore(ep.getNetwork().ID()); err != nil {
			if c.isDistributedControl() {
				retain = true
			}
			log.Warnf("Failed getting network for ep %s during sandbox %s delete: %v", ep.ID(), sb.ID(), err)
			continue
		}
		if !force {
			if err := ep.Leave(sb); err != nil {
				log.Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err)
			}
		}
		if err := ep.Delete(force); err != nil {
			log.Warnf("Failed deleting endpoint %s: %v\n", ep.ID(), err)
		}
	}
	if retain {
		sb.Lock()
		sb.inDelete = false
		sb.Unlock()
		return fmt.Errorf("could not cleanup all the endpoints in container %s / sandbox %s", sb.containerID, sb.id)
	}
	// Container is going away. Path cache in etchosts is most
	// likely not required any more. Drop it.
	etchosts.Drop(sb.config.hostsPath)
	//***停止resolver***//
	if sb.resolver != nil {
		sb.resolver.Stop()
	}
	//***移除osSbox，即真正的net namespace***//
	if sb.osSbox != nil && !sb.config.useDefaultSandBox {
		sb.osSbox.Destroy()
	}
	//***从store中删除***//
	if err := sb.storeDelete(); err != nil {
		log.Warnf("Failed to delete sandbox %s from store: %v", sb.ID(), err)
	}
	c.Lock()
	if sb.ingress {
		c.ingressSandbox = nil
	}
	//***从controller中删除***//
	delete(c.sandboxes, sb.ID())
	c.Unlock()
	return nil
}

sandbox::Rename()

Rename()可以更改绑定的endpoint的name，但现在还不知道为什么要更改名字。

func (sb *sandbox) Rename(name string) error {
	var err error
	for _, ep := range sb.getConnectedEndpoints() {
		if ep.endpointInGWNetwork() {
			continue
		}
		oldName := ep.Name()
		lEp := ep
		if err = ep.rename(name); err != nil {
			break
		}
		defer func() {
			if err != nil {
				lEp.rename(oldName)
			}
		}()
	}
	return err
}

sandbox::Refresh()

Refresh()先解绑所有的endpoint，重新配置后再绑定之前的endpoint。

func (sb *sandbox) Refresh(options ...SandboxOption) error {
	// Store connected endpoints
	epList := sb.getConnectedEndpoints()
	// Detach from all endpoints
	for _, ep := range epList {
		if err := ep.Leave(sb); err != nil {
			log.Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err)
		}
	}
	// Re-apply options
	sb.config = containerConfig{}
	sb.processOptions(options...)
	// Setup discovery files
	if err := sb.setupResolutionFiles(); err != nil {
		return err
	}
	// Re-connect to all endpoints
	for _, ep := range epList {
		if err := ep.Join(sb); err != nil {
			log.Warnf("Failed attach sandbox %s to endpoint %s: %v\n", sb.ID(), ep.ID(), err)
		}
	}
	return nil
}

sandbox::Endpoints()

Endpoints()可以返回sandbox中所有的endpoint，返回的类型是Endpoint interface。

//***返回sandbox中的endpoints***//
func (sb *sandbox) Endpoints() []Endpoint {
	sb.Lock()
	defer sb.Unlock()
	endpoints := make([]Endpoint, len(sb.endpoints))
	for i, ep := range sb.endpoints {
		endpoints[i] = ep
	}
	return endpoints
}

sandbox::getConnectedEndpoints()

和Endpoints()一样，只是返回的类型为endpoint。

//***获取sandbox中的endpoints***//
func (sb *sandbox) getConnectedEndpoints() []*endpoint {
	sb.Lock()
	defer sb.Unlock()
	eps := make([]*endpoint, len(sb.endpoints))
	for i, ep := range sb.endpoints {
		eps[i] = ep
	}
	return eps
}

sandbox::removeEndpoint()

removeEndpoint()从sandbox中移除endpoint。

//***从sandbox中移除endpoint***//
func (sb *sandbox) removeEndpoint(ep *endpoint) {
	sb.Lock()
	defer sb.Unlock()
	for i, e := range sb.endpoints {
		if e == ep {
			heap.Remove(&sb.endpoints, i)
			return
		}
	}
}

sandbox::execFunc()

execFunc()可以在net namespace中执行命令。

1
2
3

func (sb *sandbox) execFunc(f func()) error {
	return sb.osSbox.InvokeFunc(f)
}

其他

ResolveName(): 通过name获取ip；
ResolveIP(): 通过ip获取name；
SetKey(): 更新sandbox key，当使用外来的net namespace时使用，将在controller的监听unix socket中详细分析。