| #include <linux/module.h> |
| #include <linux/cache.h> |
| #include <linux/tcp.h> |
| |
| #include <net/inet_connection_sock.h> |
| #include <net/request_sock.h> |
| #include <net/sock.h> |
| #include <net/dst.h> |
| #include <net/tcp.h> |
| |
| int sysctl_tcp_nometrics_save __read_mostly; |
| |
| /* Save metrics learned by this TCP session. This function is called |
| * only, when TCP finishes successfully i.e. when it enters TIME-WAIT |
| * or goes from LAST-ACK to CLOSE. |
| */ |
| void tcp_update_metrics(struct sock *sk) |
| { |
| struct tcp_sock *tp = tcp_sk(sk); |
| struct dst_entry *dst = __sk_dst_get(sk); |
| |
| if (sysctl_tcp_nometrics_save) |
| return; |
| |
| if (dst && (dst->flags & DST_HOST)) { |
| const struct inet_connection_sock *icsk = inet_csk(sk); |
| int m; |
| unsigned long rtt; |
| |
| dst_confirm(dst); |
| |
| if (icsk->icsk_backoff || !tp->srtt) { |
| /* This session failed to estimate rtt. Why? |
| * Probably, no packets returned in time. |
| * Reset our results. |
| */ |
| if (!(dst_metric_locked(dst, RTAX_RTT))) |
| dst_metric_set(dst, RTAX_RTT, 0); |
| return; |
| } |
| |
| rtt = dst_metric_rtt(dst, RTAX_RTT); |
| m = rtt - tp->srtt; |
| |
| /* If newly calculated rtt larger than stored one, |
| * store new one. Otherwise, use EWMA. Remember, |
| * rtt overestimation is always better than underestimation. |
| */ |
| if (!(dst_metric_locked(dst, RTAX_RTT))) { |
| if (m <= 0) |
| set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt); |
| else |
| set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3)); |
| } |
| |
| if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { |
| unsigned long var; |
| if (m < 0) |
| m = -m; |
| |
| /* Scale deviation to rttvar fixed point */ |
| m >>= 1; |
| if (m < tp->mdev) |
| m = tp->mdev; |
| |
| var = dst_metric_rtt(dst, RTAX_RTTVAR); |
| if (m >= var) |
| var = m; |
| else |
| var -= (var - m) >> 2; |
| |
| set_dst_metric_rtt(dst, RTAX_RTTVAR, var); |
| } |
| |
| if (tcp_in_initial_slowstart(tp)) { |
| /* Slow start still did not finish. */ |
| if (dst_metric(dst, RTAX_SSTHRESH) && |
| !dst_metric_locked(dst, RTAX_SSTHRESH) && |
| (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) |
| dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); |
| if (!dst_metric_locked(dst, RTAX_CWND) && |
| tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) |
| dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); |
| } else if (tp->snd_cwnd > tp->snd_ssthresh && |
| icsk->icsk_ca_state == TCP_CA_Open) { |
| /* Cong. avoidance phase, cwnd is reliable. */ |
| if (!dst_metric_locked(dst, RTAX_SSTHRESH)) |
| dst_metric_set(dst, RTAX_SSTHRESH, |
| max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); |
| if (!dst_metric_locked(dst, RTAX_CWND)) |
| dst_metric_set(dst, RTAX_CWND, |
| (dst_metric(dst, RTAX_CWND) + |
| tp->snd_cwnd) >> 1); |
| } else { |
| /* Else slow start did not finish, cwnd is non-sense, |
| ssthresh may be also invalid. |
| */ |
| if (!dst_metric_locked(dst, RTAX_CWND)) |
| dst_metric_set(dst, RTAX_CWND, |
| (dst_metric(dst, RTAX_CWND) + |
| tp->snd_ssthresh) >> 1); |
| if (dst_metric(dst, RTAX_SSTHRESH) && |
| !dst_metric_locked(dst, RTAX_SSTHRESH) && |
| tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) |
| dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); |
| } |
| |
| if (!dst_metric_locked(dst, RTAX_REORDERING)) { |
| if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && |
| tp->reordering != sysctl_tcp_reordering) |
| dst_metric_set(dst, RTAX_REORDERING, tp->reordering); |
| } |
| } |
| } |
| |
| /* Initialize metrics on socket. */ |
| |
| void tcp_init_metrics(struct sock *sk) |
| { |
| struct tcp_sock *tp = tcp_sk(sk); |
| struct dst_entry *dst = __sk_dst_get(sk); |
| |
| if (dst == NULL) |
| goto reset; |
| |
| dst_confirm(dst); |
| |
| if (dst_metric_locked(dst, RTAX_CWND)) |
| tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND); |
| if (dst_metric(dst, RTAX_SSTHRESH)) { |
| tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); |
| if (tp->snd_ssthresh > tp->snd_cwnd_clamp) |
| tp->snd_ssthresh = tp->snd_cwnd_clamp; |
| } else { |
| /* ssthresh may have been reduced unnecessarily during. |
| * 3WHS. Restore it back to its initial default. |
| */ |
| tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; |
| } |
| if (dst_metric(dst, RTAX_REORDERING) && |
| tp->reordering != dst_metric(dst, RTAX_REORDERING)) { |
| tcp_disable_fack(tp); |
| tcp_disable_early_retrans(tp); |
| tp->reordering = dst_metric(dst, RTAX_REORDERING); |
| } |
| |
| if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) |
| goto reset; |
| |
| /* Initial rtt is determined from SYN,SYN-ACK. |
| * The segment is small and rtt may appear much |
| * less than real one. Use per-dst memory |
| * to make it more realistic. |
| * |
| * A bit of theory. RTT is time passed after "normal" sized packet |
| * is sent until it is ACKed. In normal circumstances sending small |
| * packets force peer to delay ACKs and calculation is correct too. |
| * The algorithm is adaptive and, provided we follow specs, it |
| * NEVER underestimate RTT. BUT! If peer tries to make some clever |
| * tricks sort of "quick acks" for time long enough to decrease RTT |
| * to low value, and then abruptly stops to do it and starts to delay |
| * ACKs, wait for troubles. |
| */ |
| if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) { |
| tp->srtt = dst_metric_rtt(dst, RTAX_RTT); |
| tp->rtt_seq = tp->snd_nxt; |
| } |
| if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) { |
| tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR); |
| tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); |
| } |
| tcp_set_rto(sk); |
| reset: |
| if (tp->srtt == 0) { |
| /* RFC6298: 5.7 We've failed to get a valid RTT sample from |
| * 3WHS. This is most likely due to retransmission, |
| * including spurious one. Reset the RTO back to 3secs |
| * from the more aggressive 1sec to avoid more spurious |
| * retransmission. |
| */ |
| tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; |
| inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; |
| } |
| /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been |
| * retransmitted. In light of RFC6298 more aggressive 1sec |
| * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK |
| * retransmission has occurred. |
| */ |
| if (tp->total_retrans > 1) |
| tp->snd_cwnd = 1; |
| else |
| tp->snd_cwnd = tcp_init_cwnd(tp, dst); |
| tp->snd_cwnd_stamp = tcp_time_stamp; |
| } |
| |
| bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) |
| { |
| if (!dst) |
| return false; |
| return dst_metric(dst, RTAX_RTT) ? true : false; |
| } |
| EXPORT_SYMBOL_GPL(tcp_peer_is_proven); |