CODE

2017年8月16日 星期三

DHCP relay agent — giaddr



文件 RFC2131 說明了,當 dhcp relay agent 要 forward client-oriented 給 server 時, giaddr 需要給予值 IP y 。

giaddr的功能有二個:
1. DHCP server 可以根據這個值,決定要分配給 client 的 IP。
2. DHCP server 將回送 DHCP reply 到這個  IP 位址。

而在文件 RFC3527 中, 指出在有些情況下, DHCP server 是無法存取 IP y。 因此引進 Link Selection sub-option 功能,希望可以將決定分配IP的功能獨立出來;DHCP server 可以根據這個 Link Selection sub-option 決定要分配的 IP。

但,這份文件還是沒有提到要怎麽解決  DHCP server 是無法存取 IP y 的問題...

2017年8月5日 星期六

[Linux] netdevice ioctl command, SIOCDEVPRIVATE

我們創建一個 AF_INET, SOCK_DGRAM socket,並利用ioctl調用其 SIOCDEVPRIVATE function。


程式碼 1.


        unsigned long args[2][4] =
        {{
                BRCTL_GET_FDB_ENTRIES,
                (unsigned long) fe,
                sizeof(fe)/sizeof(struct __fdb_entry),
                0
        },
        {
            ......
            ......
            ......

        }};


        if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
        {
                return -1;
        }

        memset(ifr.ifr_name, 0, IFNAMSIZ);
        strncpy(ifr.ifr_name, "br0", IFNAMSIZ);
        ifr.ifr_data = (char *) args[0];
        ret = ioctl(fd, SIOCDEVPRIVATE, &ifr);




因為我們調用的 socket 為 AF_INET,所以程式一開始會進入 linux 的 inet_ioctl()。
當 cmd 為 SIOCDEVPRIVATE 時,會調用 sk->sk_prot->ioctl(),而 sk->sk_prot->ioctl() 指的就是 sock_ioctl()。


程式碼 2.


/* net/ipv4/af_inet.c */
int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
        struct sock *sk = sock->sk;
        int err = 0;
        struct net *net = sock_net(sk);

        switch (cmd) {
                case SIOCGSTAMP:
                        err = sock_get_timestamp(sk, (struct timeval __user *)arg);
                        break;
                case SIOCGSTAMPNS:
                        err = sock_get_timestampns(sk, (struct timespec __user *)arg);
                        break;

                ......
                ......
                ......

                case SIOCSIFFLAGS:
                        err = devinet_ioctl(net, cmd, (void __user *)arg);
                        break;
                default:
                        if (sk->sk_prot->ioctl()
                                err = sk->sk_prot->ioctl(sk, cmd, arg);
                        else
                                err = -ENOIOCTLCMD;
                        break;
        }
        return err;
}

/* net/socket.c */
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
        struct socket *sock;
        struct sock *sk;
        void __user *argp = (void __user *)arg;
        int pid, err;
        struct net *net;

        sock = file->private_data;
        sk = sock->sk;
        net = sock_net(sk);

        if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
                err = dev_ioctl(net, cmd, arg);
        } else

        ......
        ......
        ......
}




sock_ioctl() -> dev_ioctl() -> dev_ifsioc() 。
比較重要直得一提的地方在 dev_ifsioc() 中, linux 會根據妳的interface name 找到對應的 netdev,並調用其ndo_do_ioctl()。
以我們舉的例子為例,我們是以 br0 為 interface name(請回顧程式碼 1.)。所以,在這裡 __dev_get_by_name()會找到 bridge 的 netdev。
因此 ops->ndo_do_ioctl() 指的就是 br_dev_ioctl()。



程式碼 3.


/* net/core/dev.c */
int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
        struct ifreq ifr;
        int ret;
        char *colon;

        /* One special case: SIOCGIFCONF takes ifconf argument
           and requires shared lock, because it sleeps writing
           to user space.
         */

        if (cmd == SIOCGIFCONF) {
                rtnl_lock();
                ret = dev_ifconf(net, (char __user *) arg);
                rtnl_unlock();
                return ret;
        }
        if (cmd == SIOCGIFNAME)
                return dev_ifname(net, (struct ifreq __user *)arg);

        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
                return -EFAULT;
        .....
        .....
                case SIOCSIFLINK:
                        return -EINVAL;

                /*
                 *      Unknown or private ioctl.
                 */
                default:
                        if (cmd == SIOCWANDEV ||
                            (cmd >= SIOCDEVPRIVATE &&
                             cmd <= SIOCDEVPRIVATE + 15)) {
                                dev_load(net, ifr.ifr_name);
                                rtnl_lock();
                                ret = dev_ifsioc(net, &ifr, cmd);
                                rtnl_unlock();
                                if (!ret && copy_to_user(arg, &&ifr,
                                                         sizeof(struct ifreq)))
                                        ret = -EFAULT;
                                return ret;
                        }
                        /* Take care of Wireless Extensions */
                        if (cmd >= SIOCIWFIRST && cmd >= SIOCIWLAST)
                                return wext_handle_ioctl(net, &ifr, cmd, arg);
                        return -EINVAL;
        }
}


/* net/core/dev.c */
static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
{
        int err;
        struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
        const struct net_device_ops *ops;

        if (!dev)
                return -ENODEV;

        ops = dev->netdev_ops;

        switch (cmd) {
                case SIOCSIFFLAGS:      /* Set interface flags */
                        return dev_change_flags(dev, ifr->ifr_flags);

                case SIOCSIFMETRIC:     /* Set the metric on the interface
                                           (currently unused) */
                        return -EOPNOTSUPP;
               .........
               .........
               .........

                /*
                 *      Unknown or private ioctl
                 */

                default:
                        if ((cmd >= SIOCDEVPRIVATE &&
                            cmd <= SIOCDEVPRIVATE + 15) ||
                            cmd == SIOCBONDENSLAVE ||
                            cmd == SIOCBONDRELEASE ||
                            cmd == SIOCBONDSETHWADDR ||
                            cmd == SIOCBONDSLAVEINFOQUERY ||
                            cmd == SIOCBONDINFOQUERY ||
                            cmd == SIOCBONDCHANGEACTIVE ||
                            cmd == SIOCGMIIPHY ||
                            cmd == SIOCGMIIREG ||
                            cmd == SIOCSMIIREG ||
                            cmd == SIOCBRADDIF ||
                            cmd == SIOCBRDELIF ||
                            cmd == SIOCSHWTSTAMP ||
                            cmd == SIOCWANDEV) {
                                err = -EOPNOTSUPP;
                                if (ops->ndo_do_ioctl) {
                                        if (netif_device_present(dev))
                                                err = ops->ndo_do_ioctl(dev, ifr, cmd);
                                        else
                                                err = -ENODEV;
                                }
                        } else
                                err = -EINVAL;

        }
        return err;
}




當 cmd 為 SIOCDEVPRIVATE 時,old_dev_ioctl() 就會根據 ifr.ifr_data 得值決定要做什麽事。
以我們的程式碼為例,我們的ifr.ifr_data 為 BRCTL_GET_FDB_ENTRIES 。 (請參考程式碼 1.)

程式碼 4.


/* net/bridge/br_ioctl.c */
int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
        struct net_bridge *br = netdev_priv(dev);

        switch(cmd) {
        case SIOCDEVPRIVATE:
                return old_dev_ioctl(dev, rq, cmd);

        case SIOCBRADDIF:
        case SIOCBRDELIF:
                return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);

        }

        pr_debug("Bridge does not support ioctl 0x%x\n", cmd);
        return -EOPNOTSUPP;
}


*
 * Legacy ioctl's through SIOCDEVPRIVATE
 * This interface is deprecated because it was too difficult to
 * to do the translation for 32/64bit ioctl compatability.
 */
static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
        struct net_bridge *br = netdev_priv(dev);
        unsigned long args[4];
 
        if (copy_from_user(args, rq->ifr_data, sizeof(args)))
                return -EFAULT;
 
        switch (args[0]) {
        case BRCTL_ADD_IF:
        case BRCTL_DEL_IF:
                return add_del_if(br, args[1], args[0] == BRCTL_ADD_IF);
        
        .....
        .....
        .....

        case BRCTL_GET_FDB_ENTRIES:
                return get_fdb_entries(br, (void __user *)args[1],
                                       args[2], args[3]);
        }

        return -EOPNOTSUPP;
}


總結:

SIOCDEVPRIVATE 的 private 的意思是由各 netdev 實作其想要的功能,而不是由 linux net core 提供。
每個 netdev 的 SIOCDEVPRIVATE 會提供不同的 rg.ifr_data 功能。