bind绑定原理(inet协议族的集中bind函数实现原理详解)

1、套接字的绑定

创建完套接字服务器端会在应用层使用bind函数进行套接字的绑定,这时会产生系统调用,sys_bind内核函数进行套接字。

系统调用函数的具体实现

  1. SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
  2. {
  3. struct socket *sock;
  4. struct sockaddr_storage address;
  5. int err, fput_needed;
  6. sock = sockfd_lookup_light(fd, &err, &fput_needed);
  7. if (sock) {
  8. err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
  9. if (err >= 0) {
  10. err = security_socket_bind(sock,
  11. (struct sockaddr *)&address,
  12. addrlen);
  13. if (!err)
  14. err = sock->ops->bind(sock,
  15. (struct sockaddr *)
  16. &address, addrlen);
  17. }
  18. fput_light(sock->file, fput_needed);
  19. }
  20. return err;
  21. }

首先调用函数sockfd_lookup_light()函数通过文件描述符来查找对应的套接字sock。

更多linux内核视频教程文本资料免费获取后台私信【内核】。

bind绑定原理(inet协议族的集中bind函数实现原理详解)(1)

  1. static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
  2. {
  3. struct file *file;
  4. struct socket *sock;
  5. *err = -EBADF;
  6. file = fget_light(fd, fput_needed);
  7. if (file) {
  8. sock = sock_from_file(file, err);
  9. if (sock)
  10. return sock;
  11. fput_light(file, *fput_needed);
  12. }
  13. return NULL;
  14. }

上面函数中先调用fget_light函数通过文件描述符返回对应的文件结构,然后调用函数sock_from_file函数返回该文件对应的套接字结构体地址,它存储在file->private_data属性中。

再回到sys_bind函数,在返回了对应的套接字结构之后,调用move_addr_to_kernel将用户地址空间的socket拷贝到内核空间。

然后调用INET协议族的操作集中bind函数inet_bind函数将socket地址(内核空间)和socket绑定。

  1. int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
  2. {
  3. struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
  4. struct sock *sk = sock->sk;
  5. struct inet_sock *inet = inet_sk(sk);
  6. unsigned short snum;
  7. int chk_addr_ret;
  8. int err;
  9. //RAW类型套接字若有自己的bind函数,则使用之
  10. if (sk->sk_prot->bind) {
  11. err = sk->sk_prot->bind(sk, uaddr, addr_len);
  12. goto out;
  13. }
  14. err = -EINVAL;
  15. .....................
  16. //地址合法性检查
  17. chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
  18. /* Not specified by any standard per-se, however it breaks too
  19. * many applications when removed. It is unfortunate since
  20. * allowing applications to make a non-local bind solves
  21. * several problems with systems using dynamic addressing.
  22. * (ie. your servers still start up even if your ISDN link
  23. * is temporarily down)
  24. */
  25. err = -EADDRNOTAVAIL;
  26. if (!sysctl_ip_nonlocal_bind &&
  27. !(inet->freebind || inet->transparent) &&
  28. addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
  29. chk_addr_ret != RTN_LOCAL &&
  30. chk_addr_ret != RTN_MULTICAST &&
  31. chk_addr_ret != RTN_BROADCAST)
  32. goto out;
  33. snum = ntohs(addr->sin_port);
  34. err = -EACCES;
  35. if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
  36. goto out;
  37. /* We keep a pair of addresses. rcv_saddr is the one
  38. * used by hash lookups, and saddr is used for transmit.
  39. *
  40. * In the BSD API these are the same except where it
  41. * would be illegal to use them (multicast/broadcast) in
  42. * which case the sending device address is used.
  43. */
  44. lock_sock(sk);
  45. /* Check these errors (active socket, double bind). */
  46. err = -EINVAL;
  47. if (sk->sk_state != tcp_CLOSE || inet->inet_num)//如果sk的状态是CLOSE或者本地端口已经被绑定
  48. goto out_release_sock;
  49. inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;//设置源地址
  50. if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
  51. inet->inet_saddr = 0; /* Use device */
  52. /* Make sure we are allowed to bind here. */
  53. if (sk->sk_prot->get_port(sk, snum)) {
  54. inet->inet_saddr = inet->inet_rcv_saddr = 0;
  55. err = -EADDRINUSE;
  56. goto out_release_sock;
  57. }
  58. if (inet->inet_rcv_saddr)
  59. sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
  60. if (snum)
  61. sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
  62. inet->inet_sport = htons(inet->inet_num);//设置源端口号,表明该端口已经被占用
  63. inet->inet_daddr = 0;
  64. inet->inet_dport = 0;
  65. sk_dst_reset(sk);
  66. err = 0;
  67. out_release_sock:
  68. release_sock(sk);
  69. out:
  70. return err;
  71. }

这样套接字绑定结束。

2、套接字的监听
  1. SYSCALL_DEFINE2(listen, int, fd, int, backlog)
  2. {
  3. struct socket *sock;
  4. int err, fput_needed;
  5. int somaxconn;
  6. sock = sockfd_lookup_light(fd, &err, &fput_needed);
  7. if (sock) {
  8. ......................
  9. err = security_socket_listen(sock, backlog);
  10. if (!err)
  11. err = sock->ops->listen(sock, backlog);
  12. fput_light(sock->file, fput_needed);
  13. }
  14. return err;
  15. }

该函数先通过文件描述符查找到对应的套接字结构,然后调用inet_listen函数对将套接字sk的状态设置为TCP_LISTEN。

  1. int inet_listen(struct socket *sock, int backlog)
  2. {
  3. struct sock *sk = sock->sk;
  4. unsigned char old_state;
  5. int err;
  6. lock_sock(sk);
  7. err = -EINVAL;
  8. if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
  9. goto out;
  10. old_state = sk->sk_state;
  11. if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
  12. goto out;
  13. if (old_state != TCP_LISTEN) {
  14. err = inet_csk_listen_start(sk, backlog);//该函数将sk的状态设置为TCP_LISTEN
  15. if (err)
  16. goto out;
  17. }
  18. sk->sk_max_ack_backlog = backlog;
  19. err = 0;
  20. out:
  21. release_sock(sk);
  22. return err;
  23. }

3、套接字的连接和接受连接3.1、申请连接
  1. SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
  2. int, addrlen)
  3. {
  4. struct socket *sock;
  5. struct sockaddr_storage address;
  6. int err, fput_needed;
  7. sock = sockfd_lookup_light(fd, &err, &fput_needed);
  8. if (!sock)
  9. goto out;
  10. err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
  11. if (err < 0)
  12. goto out_put;
  13. err =
  14. security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
  15. if (err)
  16. goto out_put;
  17. err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
  18. sock->file->f_FLAGS);
  19. out_put:
  20. fput_light(sock->file, fput_needed);
  21. out:
  22. return err;
  23. }

还是先调用sockfd_lookup_light函数获得socket指针,然后将用户空间地址移到内核空间,然后调用函数inet_stream_connect函数。

  1. int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
  2. int addr_len, int flags)
  3. {
  4. struct sock *sk = sock->sk;
  5. int err;
  6. long timeo;
  7. if (addr_len < sizeof(uaddr->sa_family))
  8. return -EINVAL;
  9. lock_sock(sk);
  10. ......................
  11. switch (sock->state) {
  12. default:
  13. err = -EINVAL;
  14. goto out;
  15. case SS_CONNECTED:
  16. err = -EISCONN;
  17. goto out;
  18. case SS_CONNECTING:
  19. err = -EALREADY;
  20. /* Fall out of switch with err, set for this state */
  21. break;
  22. case SS_UNCONNECTED:
  23. err = -EISCONN;
  24. if (sk->sk_state != TCP_CLOSE)
  25. goto out;
  26. err = sk->sk_prot->connect(sk, uaddr, addr_len);
  27. if (err < 0)
  28. goto out;
  29. sock->state = SS_CONNECTING;
  30. err = -EINPROGRESS;
  31. break;
  32. }
  33. timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
  34. if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
  35. /* Error code is set above */
  36. if (!timeo || !inet_wait_for_connect(sk, timeo))
  37. goto out;
  38. err = sock_intr_errno(timeo);
  39. if (signal_pending(current))
  40. goto out;
  41. }
  42. /* Connection was closed by RST, timeout, ICMP error
  43. * or another process disconnected us.
  44. */
  45. if (sk->sk_state == TCP_CLOSE)
  46. goto sock_error;
  47. sock->state = SS_CONNECTED;
  48. err = 0;
  49. out:
  50. release_sock(sk);
  51. return err;
  52. sock_error:
  53. err = sock_error(sk) ? : -ECONNABORTED;
  54. sock->state = SS_UNCONNECTED;
  55. if (sk->sk_prot->disconnect(sk, flags))
  56. sock->state = SS_DISCONNECTING;
  57. goto out;
  58. }

调用函数tcp_v4_connect函数后然后将sock的状态置SS_CONNECTING。

  1. int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
  2. {
  3. struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
  4. struct inet_sock *inet = inet_sk(sk);
  5. struct tcp_sock *tp = tcp_sk(sk);
  6. __be16 orig_sport, orig_dport;
  7. __be32 daddr, nexthop;
  8. struct flowi4 *fl4;
  9. struct rtable *rt;
  10. int err;
  11. struct ip_options_rcu *inet_opt;
  12. //合法性检查
  13. if (addr_len < sizeof(struct sockaddr_in))
  14. return -EINVAL;
  15. if (usin->sin_family != AF_INET)
  16. return -EAFNOSUPPORT;
  17. //记录吓一跳地址和目的地址
  18. nexthop = daddr = usin->sin_addr.s_addr;
  19. inet_opt = rcu_dereference_protected(inet->inet_opt,
  20. sock_owned_by_user(sk));
  21. if (inet_opt && inet_opt->opt.srr) {
  22. if (!daddr)
  23. return -EINVAL;
  24. nexthop = inet_opt->opt.faddr;
  25. }
  26. //本地端口和目的地端口
  27. orig_sport = inet->inet_sport;
  28. orig_dport = usin->sin_port;
  29. fl4 = &inet->cork.fl.u.ip4;
  30. rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
  31. RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
  32. IPPROTO_TCP,
  33. orig_sport, orig_dport, sk, true);//维护路由表
  34. if (IS_ERR(rt)) {
  35. err = PTR_ERR(rt);
  36. if (err == -ENETUNREACH)
  37. IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
  38. return err;
  39. }
  40. //处理多媒体或广播
  41. if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
  42. ip_rt_put(rt);
  43. return -ENETUNREACH;
  44. }
  45. if (!inet_opt || !inet_opt->opt.srr)
  46. daddr = fl4->daddr;
  47. if (!inet->inet_saddr)
  48. inet->inet_saddr = fl4->saddr;
  49. inet->inet_rcv_saddr = inet->inet_saddr;
  50. if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
  51. /* Reset inherited state */
  52. tp->rx_opt.ts_recent = 0;
  53. tp->rx_opt.ts_recent_stamp = 0;
  54. tp->write_seq = 0;
  55. }
  56. if (tcp_death_row.sysctl_tw_recycle &&
  57. !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
  58. struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
  59. /*
  60. * VJ's idea. We save last timestamp seen from
  61. * the destination in peer table, when entering state
  62. * TIME-WAIT * and initialize rx_opt.ts_recent from it,
  63. * when trying new connection.
  64. */
  65. if (peer) {
  66. inet_peer_refcheck(peer);
  67. if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
  68. tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
  69. tp->rx_opt.ts_recent = peer->tcp_ts;
  70. }
  71. }
  72. }
  73. //设置套接字中的目的端口和目的地址
  74. inet->inet_dport = usin->sin_port;
  75. inet->inet_daddr = daddr;
  76. inet_csk(sk)->icsk_ext_hdr_len = 0;
  77. if (inet_opt)
  78. inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
  79. tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
  80. //设置sk的状态为TCP_SYN_SENT
  81. tcp_set_state(sk, TCP_SYN_SENT);
  82. err = inet_hash_connect(&tcp_death_row, sk);
  83. if (err)
  84. goto failure;
  85. rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
  86. inet->inet_sport, inet->inet_dport, sk);
  87. if (IS_ERR(rt)) {
  88. err = PTR_ERR(rt);
  89. rt = NULL;
  90. goto failure;
  91. }
  92. /* OK, now commit destination to socket. */
  93. sk->sk_gso_type = SKB_GSO_TCPV4;
  94. sk_setup_caps(sk, &rt->dst);
  95. if (!tp->write_seq)
  96. tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
  97. inet->inet_daddr,
  98. inet->inet_sport,
  99. usin->sin_port);
  100. inet->inet_id = tp->write_seq ^ jiffies;
  101. err = tcp_connect(sk);//创建SYN报文并发送,该函数实现过程挺复杂,需进行TCP连接初始化以及发送
  102. rt = NULL;
  103. if (err)
  104. goto failure;
  105. return 0;
  106. failure:
  107. //失败处理
  108. tcp_set_state(sk, TCP_CLOSE);
  109. ip_rt_put(rt);
  110. sk->sk_route_caps = 0;
  111. inet->inet_dport = 0;
  112. return err;
  113. }

3.2、接受连接

系统调用函数sys_accept实现如下:

  1. SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
  2. int __user *, upeer_addrlen)
  3. {
  4. return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
  5. }

调用系统调用sys_accept4

  1. SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
  2. int __user *, upeer_addrlen, int, flags)
  3. {
  4. struct socket *sock, *newsock;
  5. struct file *newfile;
  6. int err, len, newfd, fput_needed;
  7. struct sockaddr_storage address;
  8. .......................
  9. sock = sockfd_lookup_light(fd, &err, &fput_needed);//根据fd获得一个socket
  10. if (!sock)
  11. goto out;
  12. err = -ENFILE;
  13. newsock = sock_alloc();//重新创建一个新的socket
  14. if (!newsock)
  15. goto out_put;
  16. <span style="white-space:pre"> </span>//复制套接字部分属性
  17. newsock->type = sock->type;
  18. newsock->ops = sock->ops;
  19. __module_get(newsock->ops->owner);
  20. <span style="white-space:pre"> </span>//给新建的socket分配文件结构,并返回新的文件描述符
  21. newfd = sock_alloc_file(newsock, &newfile, flags);
  22. if (unlikely(newfd < 0)) {
  23. err = newfd;
  24. sock_release(newsock);
  25. goto out_put;
  26. }
  27. err = security_socket_accept(sock, newsock);
  28. if (err)
  29. goto out_fd;
  30. <span style="white-space:pre"> </span>//调用inet_accept接受连接
  31. err = sock->ops->accept(sock, newsock, sock->file->f_flags);
  32. if (err < 0)
  33. goto out_fd;
  34. if (upeer_sockaddr) {//将地址信息从内核移到用户空间
  35. if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
  36. &len, 2) < 0) {
  37. err = -ECONNABORTED;
  38. goto out_fd;
  39. }
  40. err = move_addr_to_user((struct sockaddr *)&address,
  41. len, upeer_sockaddr, upeer_addrlen);
  42. if (err < 0)
  43. goto out_fd;
  44. }
  45. /* File flags are not inherited via accept() unlike another OSes. */
  46. <span style="white-space:pre"> </span>//安装文件描述符
  47. fd_install(newfd, newfile);
  48. err = newfd;
  49. out_put:
  50. fput_light(sock->file, fput_needed);
  51. out:
  52. return err;
  53. out_fd:
  54. fput(newfile);
  55. put_unused_fd(newfd);
  56. goto out_put;
  57. }

该函数创建一个新的套接字,设置客户端连接并唤醒客户端并返回一个新的文件描述符fd。

下面是inet_accept函数的实现

  1. int inet_accept(struct socket *sock, struct socket *newsock, int flags)
  2. {
  3. struct sock *sk1 = sock->sk;
  4. int err = -EINVAL;
  5. struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);//调用<span style="font-family: Consolas, 'Courier New', Courier, mono, serif; line-height: 18px;">inet_csk_accept函数从队列icsk_accept_queue取出已经连接的套接字</span>
  6. if (!sk2)
  7. goto do_err;
  8. lock_sock(sk2);
  9. sock_rps_record_flow(sk2);
  10. WARN_ON(!((1 << sk2->sk_state) &
  11. (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
  12. sock_graft(sk2, newsock);
  13. newsock->state = SS_CONNECTED;//设置套接字状态
  14. err = 0;
  15. release_sock(sk2);
  16. do_err:
  17. return err;
  18. }
4、关闭连接

关闭一个socket连接,系统调用sys_shutdown

  1. SYSCALL_DEFINE2(shutdown, int, fd, int, how)
  2. {
  3. int err, fput_needed;
  4. struct socket *sock;
  5. sock = sockfd_lookup_light(fd, &err, &fput_needed);
  6. if (sock != NULL) {
  7. err = security_socket_shutdown(sock, how);
  8. if (!err)
  9. err = sock->ops->shutdown(sock, how);
  10. fput_light(sock->file, fput_needed);
  11. }
  12. return err;
  13. }

函数最后调用inet_shutdown关闭套接字

  1. int inet_shutdown(struct socket *sock, int how)
  2. {
  3. struct sock *sk = sock->sk;
  4. int err = 0;
  5. .................
  6. lock_sock(sk);
  7. if (sock->state == SS_CONNECTING) {
  8. if ((1 << sk->sk_state) &
  9. (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
  10. sock->state = SS_DISCONNECTING;
  11. else
  12. sock->state = SS_CONNECTED;
  13. }
  14. switch (sk->sk_state) {
  15. case TCP_CLOSE:
  16. err = -ENOTCONN;
  17. default:
  18. sk->sk_shutdown |= how;
  19. if (sk->sk_prot->shutdown)
  20. sk->sk_prot->shutdown(sk, how);//调用<span style="font-family: Consolas, 'Courier New', Courier, mono, serif; line-height: 18px;">tcp_shutdown强制关闭连接</span>
  21. break;
  22. /* Remaining two branches are temporary solution for missing
  23. * close() in multithreaded environment. It is _not_ a good idea,
  24. * but we have no choice until close() is repaired at VFS level.
  25. */
  26. case TCP_LISTEN:
  27. if (!(how & RCV_SHUTDOWN))
  28. break;
  29. /* Fall through */
  30. case TCP_SYN_SENT:
  31. err = sk->sk_prot->disconnect(sk, O_NONBLOCK);//调用<span style="font-family: Consolas, 'Courier New', Courier, mono, serif; line-height: 18px; background-color: rgb(248, 248, 248);">tcp_disconnect断开连接</span>
  32. sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;//设置套接字状态
  33. break;
  34. }
  35. sk->sk_state_change(sk);
  36. release_sock(sk);
  37. return err;
  38. }
,

免责声明:本文仅代表文章作者的个人观点,与本站无关。其原创性、真实性以及文中陈述文字和内容未经本站证实,对本文以及其中全部或者部分内容文字的真实性、完整性和原创性本站不作任何保证或承诺,请读者仅作参考,并自行核实相关内容。文章投诉邮箱:anhduc.ph@yahoo.com

    分享
    投诉
    首页