#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2007 MINAMI Hirokazu <minami@pylone.jp>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 as published by
# the Free Software Foundation.
"""dependency resolver for initramfs"""

import os
import sys
import re
import getopt

############################################################
def print_help():
    """print usage"""
    sys.stderr.write("usage: %s [-l listfile] ROOT \n" \
        "  -l listfile : list of filenames\n" \
        "  ROOT        : root of extracted files\n" % sys.argv[0])
    sys.exit(0)

def check_deplib(path):
    """return a list of needed libraries for a executable """
    result = []

    for skip_suffix in [".py", ".pyc", ".sh", ".rc"]:
        if path.endswith(skip_suffix):
            return []

    (_stdin, stdouterr) = os.popen4( ("readelf", "-d", path) )
    re_pat = re.compile(".+? \(NEEDED\).+?: \[(.+)\]$")
    for line in stdouterr:
        libname = re_pat.match(line)
        if libname:
            result.append(libname.group(1))
    return result


def guess_nodeinfo(name):
    """ guess device-node's info from its name."""
    # from linux-source/Docuements/devices.txt
    full_match = {
        "mem"      : ('c',   1,   1),
        "kmem"     : ('c',   1,   2),
        "null"     : ('c',   1,   3),
        "port"     : ('c',   1,   4),
        "zero"     : ('c',   1,   5),
        "core"     : ('c',   1,   6),
        "full"     : ('c',   1,   7),
        "random"   : ('c',   1,   8),
        "urandom"  : ('c',   1,   9),
        "aio"      : ('c',   1,  10),
        "kmsg"     : ('c',   1,  11),
        "oldmem"   : ('c',   1,  12),
        "initrd"   : ('b',   1, 250),
        "tty"      : ('c',   5,   0),
        "console"  : ('c',   5,   1),
        "ptmx"     : ('c',   5,   2),
        "vcs"      : ('c',   7,   0),
        "psaux"    : ('c',  10,   1),
        "beep"     : ('b',  10, 128),
        "watchdog" : ('c',  10, 130),
        "rtc"      : ('c',  10, 135),
        "fuse"     : ('c',  10, 229),
        "mixer"    : ('c',  14,   0),
        "sequencer": ('c',  14,   1),
        "dsp"      : ('c',  14,   3),
        "route"    : ('c',  36,   0),
        "skip"     : ('c',  36,   1),
        "fwmonitor": ('c',  36,   3),
        "audit"    : ('c', 103,   0),
        "ppp"      : ('c', 108,   0),
        "input/mouse"   : ('c', 10, 149),
        "input/keyboard": ('c', 10, 150),
        "input/uinput"  : ('c', 10, 223),
        "input/mice"    : ('c', 13,  63),
        }.setdefault(name, None)
    if full_match:
        return full_match

    def parse_ata(code, part):
        """parse a node name for ATA"""
        (major, base) = {
            "a": ( 4,  0),
            "b": ( 4, 64),
            "c": (22,  0),
            "d": (22, 64),
            "e": (33,  0),
            "f": (33, 64),
            "g": (34,  0),
            "h": (34, 64),
            "i": (56,  0),
            "j": (56, 64),
            "k": (57,  0),
            "l": (57, 64),
            "m": (88,  0),
            "n": (88, 64),
            "o": (89,  0),
            "p": (89, 64),
            "q": (90,  0),
            "r": (90, 64),
            "s": (91,  0),
            "t": (91, 64),
            }[code]
        return ('b', major, base + part)

    def parse_scsi(code, part):
        """parse a node name for SCSI"""
        (major, base) = {
            "a"  : (  8,   0), #
            "b"  : (  8,  16), # 
            "c"  : (  8,  32), # 
            "d"  : (  8,  48), # 
            "e"  : (  8,  64), #
            "f"  : (  8,  80), # 
            "g"  : (  8,  96), # 
            "h"  : (  8, 112), # 
            "i"  : (  8, 128), #
            "j"  : (  8, 144), # 
            "k"  : (  8, 160), # 
            "l"  : (  8, 176), # 
            "m"  : (  8, 192), #
            "n"  : (  8, 208), # 
            "o"  : (  8, 224), # 
            "p"  : (  8, 240), # 
            }[code]
        return ('b', major, base + part)

    def parse_tty(code, part):
        """parse a node name for TTY"""
        (major, base) = {
            "p"  : (  3,  0), # old-style TTYs
            ""   : (  4,  0), # 
            "S"  : (  4, 64), # UART
            "USB": (188,  0), # USB
            }[code]
        return ('c', major, base + part)

    def parse_mtd(code, part):
        """parse a node name for MTD"""
        (major, base) = {
            "r"  : ( 99,  0), #
            ""   : ( 99,  1), # 
            }[code]
        return ('c', major, base + part * 2)
        
    splitter = re.compile("([a-z]*)([0-9]*)")
    by_prefix = {
        "hd"      : parse_ata,
        "sd"      : parse_scsi,
        "tty"     : parse_tty,
        "mtd"     : parse_mtd,
        "ram"     : (lambda s, n: ('b',   1, n)),
        "fd"      : (lambda s, n: ('b',   2, n + (n > 3) * 124)),
        "ptyp"    : (lambda s, n: ('c',   2, n)), # old-style TTYm
        "lp"      : (lambda s, n: ('c',   6, n)),
        "loop"    : (lambda s, n: ('b',   7, n)),
        "vcs"     : (lambda s, n: ('c',   7, n)),
        "md"      : (lambda s, n: ('b',   9, n)),
        "scd"     : (lambda s, n: ('b',  11, n)),
        "input/js"   : (lambda s, n: ('c',  13, n)),
        "input/mouse": (lambda s, n: ('c',  13, n + 32)),
        "input/event": (lambda s, n: ('c',  13, n + 64)),
        "sg"      : (lambda s, n: ('c',  21, n)),
        "rom"     : (lambda s, n: ('b',  31, n)),
        "rrom"    : (lambda s, n: ('b',  31, n + 7)),
        "flash"   : (lambda s, n: ('b',  31, n + 14)),
        "rflash"  : (lambda s, n: ('b',  31, n + 21)),
        "tap"     : (lambda s, n: ('b',  36, n + 16)),
        "video"   : (lambda s, n: ('c',  81, n)),
        "radio"   : (lambda s, n: ('c',  81, n + 64)),
        "vtx"     : (lambda s, n: ('c',  81, n + 128)),
        "vbi"     : (lambda s, n: ('c',  81, n + 192)),
        "parport" : (lambda s, n: ('c',  99, n)),
        }

    for prefix in by_prefix:
        if name.startswith(prefix):
            match = splitter.match(name[len(prefix):])
            if match:
                as_num = int(match.group(2) or "0") 
                return by_prefix[prefix](match.group(1), as_num)
    # events, sound, FB...
    raise ValueError(name)

##############################################################################
class Registry(object):
    """file dependency registry"""
    def __init__(self, topdir):
        if not topdir:
            raise ValueError(topdir)
        self._src = {}
        omit = len(topdir)
        for (dirpath, _dirnames, filenames) in os.walk(topdir):
            for filename in filenames:
                self._src.setdefault(filename, []).append(dirpath[omit:])

        self._links = {# (from_dir, from_fname) -> to_path
            #should exist
            ("/dev","fd")    :"/proc/self/fd",
            ("/dev","stdin") :"fd/0",
            ("/dev","stdout"):"fd/1",
            ("/dev","stderr"):"fd/2",
            }
        self._referrer = {}
        self._files = {} # (dir, fname) -> [for_what]
        self._dirs = set([ 
            # always create these dirs
            "/bin",
            "/dev",
            "/dev/input",
            "/dev/pts",
            "/dev/shm",
            "/dev/watchdogs",
            "/etc",
            "/mnt",
            "/proc",
            "/root",
            "/sbin",
            "/sys",
            "/tmp",
            "/usr",
            "/usr/bin",
            "/usr/lib",
            "/var",
            "/var/lib",
            "/var/log",
            "/var/run",
            ])
        self._nodes = {}
        for node in [
            "null",
            "zero",
            "tty",
            "tty0",
            "ttyS0",
            "console",
            "hda",
            "hda1",
            "sda",
            "sda1",
            "vcs",
            "vcs1",
            "vcs2",
            "vcs7",
            ]:
            self.register_node(node)
        self._top = topdir

    def gather_info(self, filelist):
        """collect info for dependencies"""
        self._wanted = set()
        for cand in filelist:
            cand = cand.rstrip("\r\n")
            if not cand or cand.startswith("#"):
                continue
            dname =  os.path.dirname(cand)
            basename = os.path.basename(cand)

            if dname.startswith("/dev"):
                #try to register as a device node
                if self.register_node(basename):
                    continue
            dname = dname or self.guess_dir(basename)
            self.resolve_dep(dname, basename, "*")
            self._wanted.add(dname + '/' +basename)

    def guess_dir(self, fname, preferred = None):
        """guess file location from its name (ex. libc.so->'/lib')"""
        preferred = preferred or ["bin", "lib"]
        if fname.endswith("/"):
            return fname[:-1]
        dirs = self._src[os.path.basename(fname)]
        # if multiple dirs were found, use common sense
        for substr in preferred:
            for dname in dirs:
                if substr in dname:
                    return dname
        return dirs[0]

    def register_dir(self, path):
        """register a directory"""
        if path in self._dirs:
            return
        if not path.endswith("/"):
            self._dirs.add(path)
        parent = os.path.dirname(path)
        if parent and (parent != '/'):
            self.register_dir(parent)

    def dump_dirs(self):
        """print directories"""
        for name in sorted(self._dirs):
            print "dir %s 755 0 0" % name

    def is_known_dir(self, name):
        """check a directory is going to be included"""
        return (name in self._dirs)

    def register_symlink(self, src, dest):
        """register a symbolic link"""
        if src in self._links:
            return
        (dirname, path) = src
        self._links[(dirname, path)] = dest

    def dump_symlinks(self):
        """print symbolic links"""
        for name in sorted(self._links):
            # ignore symlink if a file was also registered
            # (i.e. prefer real binary over busybox applets)
            if name in self._files:
                continue
            (dirn, basen) = name
            print "slink %s/%s %s 777 0 0" % (dirn, basen, self._links[name])

    def register_node(self, name):
        """register device node name"""
        if(name in self._nodes):
            raise ValueError(name)
        nodeinfo = guess_nodeinfo(name)
        if not nodeinfo:
            return None
        (b_or_c, major, minor) = nodeinfo
        self._nodes[name] = (b_or_c, major, minor)
        return (b_or_c, major, minor)

    def dump_nodes(self):
        """print nodes"""
        for name in sorted(self._nodes):
            (b_or_c, major, minor) = self._nodes[name]
            print "nod /dev/%s 660 0 0 %s %d %d" % (name, b_or_c, major, minor)

    def register_file(self, loc, referrer):
        """register a file"""
        (dname, path) = loc
        self._files.setdefault((dname, path), set()).add(referrer)

    def dump_files(self):
        """print files"""
        for (dirname, basename) in sorted(self._files):
            #deps = [p for p in self._files[(dirname, basename)] if p !="*"]
            #if deps:
            #    print "#!# for [", ", ".join(deps),"]"
            relpath = dirname + "/" + basename
            print "file %s %s 755 0 0 " % (relpath, self._top + relpath)

    def process_busybox(self):
        """ create symlinks for all busybox applets."""
        may_busybox = [(d, b) for (d, b) in self._files if b =='busybox']
        if not may_busybox:
            return
        self.register_symlink(("","init"), "/sbin/init")
        ##  this list may not match your busybox...
        for link_to in """
        [, [[, addgroup, adduser, adjtimex, ar, arping, ash, awk, basename,
        bunzip2, busybox, bzcat, cal, cat, chgrp, chmod, chown, chroot,
        chvt, clear, cmp, cp, cpio, crond, crontab, cut, date, dc, dd,
        deallocvt, delgroup, deluser, df, dirname, dmesg, dos2unix, dpkg,
        dpkg-deb, du, dumpkmap, dumpleases, echo, egrep, env, expr, false,
        fbset, fdflush, fdisk, fgrep, find, fold, free, freeramdisk,
        fsck.minix, ftpget, ftpput, getopt, getty, grep, gunzip, gzip,
        halt, head, hexdump, hostid, hostname, httpd, hwclock, id, ifconfig,
        ifdown, ifup, init, ip, ipaddr, ipcalc, iplink, iproute, iptunnel,
        kill, killall, klogd, last, length, linuxrc, ln, loadfont, loadkmap,
        logger, login, logname, logread, losetup, ls, makedevs, md5sum,
        mesg, mkdir, mkfifo, mkfs.minix, mknod, mkswap, mktemp, more,
        mount, mt, mv, nameif, nc, netstat, nslookup, od, openvt, passwd,
        patch, pidof, ping, ping6, pivot_root, poweroff, printf, ps,
        pwd, rdate, readlink, realpath, reboot, renice, reset, rm, rmdir,
        route, rpm, rpm2cpio, run-parts, sed, setkeycodes, sh, sha1sum,
        sleep, sort, start-stop-daemon, strings, stty, su, sulogin, swapoff,
        swapon, switch_root, sync, syslogd, tail, tar, tee, telnet, telnetd,
        test, tftp, time, top, touch, tr, traceroute, true, tty, udhcpc,
        udhcpd, umount, uname, uncompress, uniq, unix2dos, unzip, uptime,
        usleep, uudecode, uuencode, vi, vloc""".replace(",","").split():
            # cannot link busybox to busybox itself
            if not link_to.endswith("busybox"):
                # create link under /bin *and* /sbin
                for dest in ("/bin", "/sbin"):
                    self.register_symlink((dest, link_to), "/bin/busybox")

    def process_pythonmodules(self):
        """mimic postinst for python modules """
        base_dirs = []
        for pyver in ("2.4","2.5"):
            pylib = "/usr/lib/python%s/site-packages" % pyver
            if self.is_known_dir(pylib):
                base_dirs.append(pylib)
        if not base_dirs:
            return
        self.process_pycentral(base_dirs)
        self.process_pythonsupport(base_dirs)

    PYCENTRAL_PAT = re.compile("^/usr/share/pycentral/.*?/site-packages/(.*)")
    def process_pycentral(self, base_dirs):
        """create links for pycentral managed python modules"""
        for (dirname, basename) in self._files.keys():
            mod = self.PYCENTRAL_PAT.match(dirname)
            if mod and basename.endswith(".py"):
                for base_dir in base_dirs:
                    destdir = base_dir + "/" + mod.group(1)
                    self.register_symlink((destdir, basename),
                                          dirname + "/" +basename)
                    self.register_dir(destdir)

    PSUPPORT_PAT = re.compile("^/usr/share/python-support/.*?/(.*)")
    def process_pythonsupport(self, base_dirs):
        """create links for python-support managed python modules"""
        for (dirname, basename) in self._files.keys():
            mod = self.PSUPPORT_PAT.match(dirname)
            if mod and basename.endswith(".py"):
                for base_dir in base_dirs:
                    destdir = base_dir + "/" + mod.group(1)
                    self.register_symlink((destdir, basename),
                                          dirname + "/" +basename)
                    self.register_dir(destdir)

    def resolve_dep(self, dname, path, referrer):
        """register a file and its depends"""
        if (dname, path) in self._links:
            return
        self.register_dir(dname)

        relpath = dname + "/" + path
        relpath = relpath.rstrip('/')

        fullpath = self._top + relpath
        # symlink
        if os.path.islink(fullpath):
            linkto = os.readlink(fullpath)
            self.register_symlink((dname, path), linkto)
            self.resolve_dep(os.path.dirname(linkto) or dname,
                            os.path.basename(linkto),
                            dname + "/" + path)
        # normal file
        elif os.path.isfile(fullpath):
            needed = check_deplib(fullpath)
            for dep in needed:
                try:
                    dep_dir = self.guess_dir(dep, ["lib"])
                except KeyError:
                    raise Exception(" %s not found (required by %s)",
                                    dep, path)
                self._referrer.setdefault(dep_dir+'/'+dep,set()).add(relpath)
                self.resolve_dep(dep_dir, dep, dname + "/"+path)
            self.register_file( (dname, path), referrer)
        # directory
        elif os.path.isdir(fullpath):
            for blacklisted in [".egg-info"]:
                if relpath.endswith(blacklisted):
                    return
            for ent in os.listdir(fullpath):
                self.resolve_dep(relpath, ent, dname + "/" + path )
        else:
            if dname.startswith("/proc"):
                return # files under /proc may be dynamically created
            raise ValueError("not found: " + fullpath)

    def gen_script(self, requested):
        """generate the contents of script for INITRAMFS"""
        print "### scanned `under '%s'" % self._top
        self.gather_info(requested)
        self.process_busybox()
        self.process_pythonmodules()

        print "##DIRS"
        self.dump_dirs()

        print "##DEVICE NODES"
        self.dump_nodes()

        print "##FILES"
        self.dump_files()

        print "##SYMLINKS"
        self.dump_symlinks()

    def graph_add_edge(self, graph, dest, src, decor = None):       
        if not '/' in dest:
            dest = os.path.dirname(src) + "/" + dest
#        if graph.has_edge(src,dest):
#            return;
        if not (src in graph):
            graph.add_node(src)
        if not (dest in graph):
            graph.add_node(dest)
        graph.add_edge(graph.get_node(src),
                       graph.get_node(dest))
        e = graph.get_edge(graph.get_node(src),
                           graph.get_node(dest))
        if decor:
            e.attr['color'] = decor
        
    def graph_symlinks(self, graph):
        """draw symbolic links"""
        for name in self._links:
            if name in self._files:
                continue
            linkfrom = self._links[name]
            if linkfrom == "/bin/busybox":
                continue
            linkto = "%s/%s" % (name[0], name[1])
            self.graph_add_edge(graph, linkfrom, linkto, 'green')
        for fullpath in self._referrer:
            for dest in self._referrer[fullpath]:
                self.graph_add_edge(graph, fullpath, dest, 'gray')

    def graph_files(self, graph):
        """draw files"""
        for (dirname, basename) in self._files:
            deps = [p for p in self._files[(dirname, basename)] if p !="*"]

    def graph_decor(self, graph):
        for path in self._wanted:
            if path in graph:
                n = graph.get_node(path)
                n.attr['color'] = 'red'

#    def gen_graph(self):
#        import pygraphviz
#        G=pygraphviz.AGraph(strict=False,directed=True)
#        self.graph_symlinks(G)
#        self.graph_files(G)
#        self.graph_decor(G)
#        G.draw('file.png', prog='dot')
#        G.draw('file.svg', prog='dot')


##############################################################################
if len(sys.argv) == 0:
    print_help()

WANTED = []
OPTS, ARGS = getopt.getopt(sys.argv[1:], "l:h")
if not ARGS:
    print_help()
for (o, v) in OPTS:
    if o == "-h":
        print_help()
    elif(o == "-l"):
        try:
            WANTED = open(v)
            print "### file list from '%s'" % v
        except IOError:
            print_help()

if not WANTED:
    print "### file list from stdin" % v
    WANTED= [l for l in sys.stdin]

r = Registry(ARGS[0])
r.gen_script(WANTED)
#r.gen_graph()

