/* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * ROUTE - implementation of the IP router. * * Version: @(#)route.c 1.0.14 05/31/93 * * Authors: Ross Biro, * Fred N. van Kempen, * Alan Cox, * Linus Torvalds, * * Fixes: * Alan Cox : Verify area fixes. * Alan Cox : cli() protects routing changes * Rui Oliveira : ICMP routing table updates * (rco@di.uminho.pt) Routing table insertion and update * Linus Torvalds : Rewrote bits to be sensible * Alan Cox : Added BSD route gw semantics * Alan Cox : Super /proc >4K * Alan Cox : MTU in route table * Alan Cox : MSS actually. Also added the window * clamper. * Sam Lantinga : Fixed route matching in rt_del() * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ip.h" #include "protocol.h" #include "route.h" #include "tcp.h" #include #include "sock.h" #include "icmp.h" /* * The routing table list */ static struct rtable *rt_base = NULL; /* * Pointer to the loopback route */ static struct rtable *rt_loopback = NULL; /* * Remove a routing table entry. */ static void rt_del(unsigned long dst, char *devname) { struct rtable *r, **rp; unsigned long flags; rp = &rt_base; /* * This must be done with interrupts off because we could take * an ICMP_REDIRECT. */ save_flags(flags); cli(); while((r = *rp) != NULL) { /* Make sure both the destination and the device match */ if ( r->rt_dst != dst || (devname != NULL && strcmp((r->rt_dev)->name,devname) != 0) ) { rp = &r->rt_next; continue; } *rp = r->rt_next; /* * If we delete the loopback route update its pointer. */ if (rt_loopback == r) rt_loopback = NULL; kfree_s(r, sizeof(struct rtable)); } restore_flags(flags); } /* * Remove all routing table entries for a device. This is called when * a device is downed. */ void ip_rt_flush(struct device *dev) { struct rtable *r; struct rtable **rp; unsigned long flags; rp = &rt_base; save_flags(flags); cli(); while ((r = *rp) != NULL) { if (r->rt_dev != dev) { rp = &r->rt_next; continue; } *rp = r->rt_next; if (rt_loopback == r) rt_loopback = NULL; kfree_s(r, sizeof(struct rtable)); } restore_flags(flags); } /* * Used by 'rt_add()' when we can't get the netmask any other way.. * * If the lower byte or two are zero, we guess the mask based on the * number of zero 8-bit net numbers, otherwise we use the "default" * masks judging by the destination address and our device netmask. */ static inline unsigned long default_mask(unsigned long dst) { dst = ntohl(dst); if (IN_CLASSA(dst)) return htonl(IN_CLASSA_NET); if (IN_CLASSB(dst)) return htonl(IN_CLASSB_NET); return htonl(IN_CLASSC_NET); } /* * If no mask is specified then generate a default entry. */ static unsigned long guess_mask(unsigned long dst, struct device * dev) { unsigned long mask; if (!dst) return 0; mask = default_mask(dst); if ((dst ^ dev->pa_addr) & mask) return mask; return dev->pa_mask; } /* * Find the route entry through which our gateway will be reached */ static inline struct device * get_gw_dev(unsigned long gw) { struct rtable * rt; for (rt = rt_base ; ; rt = rt->rt_next) { if (!rt) return NULL; if ((gw ^ rt->rt_dst) & rt->rt_mask) continue; /* * Gateways behind gateways are a no-no */ if (rt->rt_flags & RTF_GATEWAY) return NULL; return rt->rt_dev; } } /* * Rewrote rt_add(), as the old one was weird - Linus * * This routine is used to update the IP routing table, either * from the kernel (ICMP_REDIRECT) or via an ioctl call issued * by the superuser. */ void ip_rt_add(short flags, unsigned long dst, unsigned long mask, unsigned long gw, struct device *dev, unsigned short mtu, unsigned long window) { struct rtable *r, *rt; struct rtable **rp; unsigned long cpuflags; /* * A host is a unique machine and has no network bits. */ if (flags & RTF_HOST) { mask = 0xffffffff; } /* * Calculate the network mask */ else if (!mask) { if (!((dst ^ dev->pa_addr) & dev->pa_mask)) { mask = dev->pa_mask; flags &= ~RTF_GATEWAY; if (flags & RTF_DYNAMIC) { /*printk("Dynamic route to my own net rejected\n");*/ return; } } else mask = guess_mask(dst, dev); dst &= mask; } /* * A gateway must be reachable and not a local address */ if (gw == dev->pa_addr) flags &= ~RTF_GATEWAY; if (flags & RTF_GATEWAY) { /* * Don't try to add a gateway we can't reach.. */ if (dev != get_gw_dev(gw)) return; flags |= RTF_GATEWAY; } else gw = 0; /* * Allocate an entry and fill it in. */ rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC); if (rt == NULL) { return; } memset(rt, 0, sizeof(struct rtable)); rt->rt_flags = flags | RTF_UP; rt->rt_dst = dst; rt->rt_dev = dev; rt->rt_gateway = gw; rt->rt_mask = mask; rt->rt_mss = dev->mtu - HEADER_SIZE; rt->rt_window = 0; /* Default is no clamping */ /* Are the MSS/Window valid ? */ if(rt->rt_flags & RTF_MSS) rt->rt_mss = mtu; if(rt->rt_flags & RTF_WINDOW) rt->rt_window = window; /* * What we have to do is loop though this until we have * found the first address which has a higher generality than * the one in rt. Then we can put rt in right before it. * The interrupts must be off for this process. */ save_flags(cpuflags); cli(); /* * Remove old route if we are getting a duplicate. */ rp = &rt_base; while ((r = *rp) != NULL) { if (r->rt_dst != dst || r->rt_mask != mask) { rp = &r->rt_next; continue; } *rp = r->rt_next; if (rt_loopback == r) rt_loopback = NULL; kfree_s(r, sizeof(struct rtable)); } /* * Add the new route */ rp = &rt_base; while ((r = *rp) != NULL) { if ((r->rt_mask & mask) != mask) break; rp = &r->rt_next; } rt->rt_next = r; *rp = rt; /* * Update the loopback route */ if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback) rt_loopback = rt; /* * Restore the interrupts and return */ restore_flags(cpuflags); return; } /* * Check if a mask is acceptable. */ static inline int bad_mask(unsigned long mask, unsigned long addr) { if (addr & (mask = ~mask)) return 1; mask = ntohl(mask); if (mask & (mask+1)) return 1; return 0; } /* * Process a route add request from the user */ static int rt_new(struct rtentry *r) { int err; char * devname; struct device * dev = NULL; unsigned long flags, daddr, mask, gw; /* * If a device is specified find it. */ if ((devname = r->rt_dev) != NULL) { err = getname(devname, &devname); if (err) return err; dev = dev_get(devname); putname(devname); if (!dev) return -EINVAL; } /* * If the device isn't INET, don't allow it */ if (r->rt_dst.sa_family != AF_INET) return -EAFNOSUPPORT; /* * Make local copies of the important bits */ flags = r->rt_flags; daddr = ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr; mask = ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr; gw = ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr; /* * BSD emulation: Permits route add someroute gw one-of-my-addresses * to indicate which iface. Not as clean as the nice Linux dev technique * but people keep using it... */ if (!dev && (flags & RTF_GATEWAY)) { struct device *dev2; for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) { if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) { flags &= ~RTF_GATEWAY; dev = dev2; break; } } } /* * Ignore faulty masks */ if (bad_mask(mask, daddr)) mask = 0; /* * Set the mask to nothing for host routes. */ if (flags & RTF_HOST) mask = 0xffffffff; else if (mask && r->rt_genmask.sa_family != AF_INET) return -EAFNOSUPPORT; /* * You can only gateway IP via IP.. */ if (flags & RTF_GATEWAY) { if (r->rt_gateway.sa_family != AF_INET) return -EAFNOSUPPORT; if (!dev) dev = get_gw_dev(gw); } else if (!dev) dev = ip_dev_check(daddr); /* * Unknown device. */ if (dev == NULL) return -ENETUNREACH; /* * Add the route */ ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window); return 0; } /* * Remove a route, as requested by the user. */ static int rt_kill(struct rtentry *r) { struct sockaddr_in *trg; char *devname; int err; trg = (struct sockaddr_in *) &r->rt_dst; if ((devname = r->rt_dev) != NULL) { err = getname(devname, &devname); if (err) return err; } rt_del(trg->sin_addr.s_addr, devname); if ( devname != NULL ) putname(devname); return 0; } /* * Called from the PROCfs module. This outputs /proc/net/route. */ int rt_get_info(char *buffer, char **start, off_t offset, int length) { struct rtable *r; int len=0; off_t pos=0; off_t begin=0; int size; len += sprintf(buffer, "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\n"); pos=len; /* * This isn't quite right -- r->rt_dst is a struct! */ for (r = rt_base; r != NULL; r = r->rt_next) { size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\n", r->rt_dev->name, r->rt_dst, r->rt_gateway, r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric, r->rt_mask, (int)r->rt_mss, r->rt_window); len+=size; pos+=size; if(posoffset+length) break; } *start=buffer+(offset-begin); len-=(offset-begin); if(len>length) len=length; return len; } /* * This is hackish, but results in better code. Use "-S" to see why. */ #define early_out ({ goto no_route; 1; }) /* * Route a packet. This needs to be fairly quick. Florian & Co. * suggested a unified ARP and IP routing cache. Done right its * probably a brilliant idea. I'd actually suggest a unified * ARP/IP routing/Socket pointer cache. Volunteers welcome */ struct rtable * ip_rt_route(unsigned long daddr, struct options *opt, unsigned long *src_addr) { struct rtable *rt; for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) { if (!((rt->rt_dst ^ daddr) & rt->rt_mask)) break; /* * broadcast addresses can be special cases.. */ if (rt->rt_flags & RTF_GATEWAY) continue; if ((rt->rt_dev->flags & IFF_BROADCAST) && (rt->rt_dev->pa_brdaddr == daddr)) break; } if(src_addr!=NULL) *src_addr= rt->rt_dev->pa_addr; if (daddr == rt->rt_dev->pa_addr) { if ((rt = rt_loopback) == NULL) goto no_route; } rt->rt_use++; return rt; no_route: return NULL; } struct rtable * ip_rt_local(unsigned long daddr, struct options *opt, unsigned long *src_addr) { struct rtable *rt; for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) { /* * No routed addressing. */ if (rt->rt_flags&RTF_GATEWAY) continue; if (!((rt->rt_dst ^ daddr) & rt->rt_mask)) break; /* * broadcast addresses can be special cases.. */ if ((rt->rt_dev->flags & IFF_BROADCAST) && rt->rt_dev->pa_brdaddr == daddr) break; } if(src_addr!=NULL) *src_addr= rt->rt_dev->pa_addr; if (daddr == rt->rt_dev->pa_addr) { if ((rt = rt_loopback) == NULL) goto no_route; } rt->rt_use++; return rt; no_route: return NULL; } /* * Backwards compatibility */ static int ip_get_old_rtent(struct old_rtentry * src, struct rtentry * rt) { int err; struct old_rtentry tmp; err=verify_area(VERIFY_READ, src, sizeof(*src)); if (err) return err; memcpy_fromfs(&tmp, src, sizeof(*src)); memset(rt, 0, sizeof(*rt)); rt->rt_dst = tmp.rt_dst; rt->rt_gateway = tmp.rt_gateway; rt->rt_genmask.sa_family = AF_INET; ((struct sockaddr_in *) &rt->rt_genmask)->sin_addr.s_addr = tmp.rt_genmask; rt->rt_flags = tmp.rt_flags; rt->rt_dev = tmp.rt_dev; printk("Warning: obsolete routing request made.\n"); return 0; } /* * Handle IP routing ioctl calls. These are used to manipulate the routing tables */ int ip_rt_ioctl(unsigned int cmd, void *arg) { int err; struct rtentry rt; switch(cmd) { case SIOCADDRTOLD: /* Old style add route */ case SIOCDELRTOLD: /* Old style delete route */ if (!suser()) return -EPERM; err = ip_get_old_rtent((struct old_rtentry *) arg, &rt); if (err) return err; return (cmd == SIOCDELRTOLD) ? rt_kill(&rt) : rt_new(&rt); case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ if (!suser()) return -EPERM; err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry)); if (err) return err; memcpy_fromfs(&rt, arg, sizeof(struct rtentry)); return (cmd == SIOCDELRT) ? rt_kill(&rt) : rt_new(&rt); } return -EINVAL; }