如何實現自己的linux container?







#define _GNU_SOURCEinclude <sys/types.h>
include <sys/wait.h>
include <stdio.h>
include <sched.h>
include <signal.h>
include <unistd.h>
include <errno.h>
include <stdlib.h>
include <sys/mount.h>
include <libcgroup.h>
include <time.h>
include <signal.h>
define STACK_SIZE (1024 * 1024)
define MEMORY_LIMIT (51210241024)
const char rootfs = "/data1/centos6/rootfs/"; //centos6 鏡像位置 const char hostname = "mydocker"; //container 主機名 static char child_stack[STACK_SIZE]; char const child_args[] = { "/bin/bash", NULL }; int pipe_fd【2】; //父子進程同步 int child_main(void args) { char c; printf("In child process(container)\n"); chroot(rootfs); //用chroot 切換根目錄 if(errno != 0){ perror("chroot()"); exit(1); } //clone 調用中的 CLONE_NEWUTS起隔離主機名和域名的作用 sethostname(hostname, sizeof(hostname)); if( errno != 0 ){ perror("sethostname()!"); exit(1); } //掛載proc子系統,CLONE_NEWNS 起隔離文件系統作用 mount("proc", "/proc", "proc", 0, NULL); if (errno != 0){ perror("Mount(proc)"); exit(1); } //切換的根目錄 chdir("/"); close(pipe_fd【1】); read(pipe_fd【0】, &c, 1); //設置veth1 網絡 system("ip link set lo up"); system("ip link set veth1 up"); system("ip addr add 169.254.1.2/30 dev veth1"); //將子進程的鏡像替換成bash execv(child_args[0], child_args); return 1; } struct cgroup cgroup_control(pid_t pid){ struct cgroup cgroup = NULL; int ret; ret = cgroup_init(); char cgname = malloc(19sizeof(char)); if (ret) { printf("error occurs while init cgroup.\n"); return NULL; } time_t nowtime = time(NULL); sprintf(cgname, "mydocker%d", (int)now_time); printf("%s\n", cgname); cgroup = cgroup_new_cgroup(cgname); if( !cgroup ){ ret = ECGFAIL; printf("Error new cgroup%s\n", cgroup_strerror(ret)); goto out; } //添加cgroup memory 和 cpuset子系統 struct cgroup_controller cgc = cgroup_add_controller(cgroup, "memory"); struct cgroup_controller cgc_cpuset = cgroup_add_controller(cgroup, "cpuset"); if ( !cgc || !cgc_cpuset ){ ret = ECGINVAL; printf("Error add controller %s\n", cgroup_strerror(ret)); goto out; } // 內存限制 512M if( cgroup_add_value_uint64(cgc, "memory.limit_in_bytes", MEMORY_LIMIT) ){ printf("Error limit memory.\n"); goto out; } //限制只能使用0和1號cpu if ( cgroup_add_value_string(cgc_cpuset, "cpuset.cpus", "0-1") ){ printf("Error limit cpuset cpus.\n"); goto out; } //限制只能使用0和1塊內存 if ( cgroup_add_value_string(cgc_cpuset, "cpuset.mems", "0-1") ){ printf("Error limit cpuset mems.\n"); goto out; } ret = cgroup_create_cgroup(cgroup, 0); if (ret){ printf("Error create cgroup%s\n", cgroup_strerror(ret)); goto out; } ret = cgroup_attach_task_pid(cgroup, pid); if (ret){ printf("Error attach_task_pid %s\n", cgroup_strerror(ret)); goto out; } return cgroup; out: if (cgroup){ cgroup_delete_cgroup(cgroup, 0); cgroup_free(&cgroup); } return NULL; } int main() { char cmd; printf("main process: \n"); pipe(pipe_fd); if( errno != 0){
perror("pipe()"); exit(1); } int child_pid = clone(child_main, child_stack + STACK_SIZE, \ CLONE_NEWNET | CLONE_NEWNS | CLONE_NEWPID | CLONE_NEWIPC | CLONE_NEWUTS | SIGCHLD, NULL); struct cgroup cg = cgroup_control(child_pid); //添加veth pair ,設置veth1 namespace 為子進程的,veth0 在父進程的namespace //linl3 實現起來太繁瑣,借用命令行工具ip 實現 system("ip link add veth0 type veth peer name veth1"); asprintf(&cmd, "ip link set veth1 netns %d", child_pid); system(cmd); system("ip link set veth0 up"); system("ip addr add 169.254.1.1/30 dev veth0"); free(cmd); //等執行以上命令,通知子進程,子進程設置自己的網絡 close(pipe_fd【1】); waitpid(child_pid, NULL, 0); if (cg) { cgroup_delete_cgroup(cg, 0); //刪除cgroup 子系統 } printf("child process exited.\n"); return 0; }</pre>來自:http://weibo.com/p/1001603824282965777334