#!/usr/bin/ruby
# require "nusdas/nusrecord"

=begin
= class Nusdas::NusRecStream

((<ruby gݍ݂ File|URL:http://www.ruby-lang.org/ja/man-1.6/?cmd=view;name=File>)) pĂB

܂̂ƂAǂݎ肾B
=end

module Nusdas
class NusRecStream < File

=begin
== ŔO
:FmtError
  IOError p
=end

    class FmtError < IOError
    end

=begin
== NX\bh
--- self::new (file, mode = "r", perm = 0666)
    PɃt@CJłȂA binmode ɂB
=end

    def initialize (file, mode = "r", perm = 0666)
        super
        binmode
	@tell_backspace = nil
    end

    attr_reader :tell_backspace

=begin
--- self::open (file, mode = "r", perm = 0666)
    PɃt@CJłȂA binmode ɂB
=end

    def self::open(filename, mode = "r", perm = 0666)
        r = super
        r.binmode
    end

=begin
== CX^X\bh
--- get_record_str
    1R[hǂށBԋpl̓wb_ƃyC[hȂ2vfzB
    t@CGhɗĂ nil ԂB
    * R[h̍\͈ȉ̒ʂB
      T^I FORTRAN ȂҐt@CƂ肾ႤB
      L^͋L^g܂ނ FORTRAN ͓ǂ߂ȂB
      * wb_
        * L^: 32rbg (PʃoCg)
        * L^: 4oCg
        * L^L: 32rbg
        * XV: 32rbgAunix time ̕ԋpl
      * yC[h
        (yC[h̒ (L^ - 20) oCgł邪A
        ̂LȂ̂ (L^L - 12) )
      * L^
--- get_record
    1R[hǂ ((<NusRecord|URL:nusrecord.html>)) \zĕԂB
    t@CGhɗĂ nil ԂB
    G[ ((<FmtError>)) ԂB
    * ((**)) ŕ NUSDAS_WRITE ꂽꍇA
       DATA R[h݂邱ƂɂȂB
      ̏ꍇAINDX R[hɎwĂ̂LłB
      L DATA R[hQƂꍇ
      ((<Nusdas::NusFile|URL:nusfile.html>)) gƁB
=end

    def get_record_str()
        # wb_
	@tell_backspace = self.tell
        head = read(16)
	return nil if head.nil?
	raise FmtError, "too short record" if head.length < 16

        # yC[h
        recl = head.unpack("N").first
	vrecl = head.unpack("x8N").first - 12
        payload = self.read(recl - 20)[0, vrecl]
	if payload.nil? or payload.length < (recl - 20) then
	    raise FmtError, "too short record: missing payload"
	end

        # L^̊mF (wb_̋L^ƂĂ邩)
        tail = read(4)
        if tail.nil? or tail.length < 4 then
            raise(FmtError, "malformed record: missing tail")
        end
        tailed = tail.unpack('N').first
        if (tailed != recl) then
            raise(FmtError, "record length mismatch #{tailed} #{r.size}")
        end
        return [head, payload]
    end

    def get_record()
        require "nusdas/nusrecord"
        head, payload = get_record_str()
	return nil if head.nil?
	return NusRecord.new(head, payload)
    end

=begin
== foOp\bh
--- self::debug
    R}hC̃t@Cɂ get_record Ă݂B
=end

    def self::debug
        for filename in ARGV
            input = NusRecStream::open(filename)
            while 1
                itell = input.tell
                r = input.get_record
                break if (r == nil)
                print "#{sprintf '%04x', itell} #{r.inspect}\n"
            end
            input.close
        end
    end

end
end
# $Id: nusrecstream.rb,v 1.14 2003/02/10 07:57:08 suuchi43 Exp $
require 'narray'

module Nusdas

    class NusRecStream

        def put_record(kind, pl)
	    now = Time.now.to_i
	    kind = [kind].pack('A4')
	    hdr = [pl.length + 20, kind, pl.length + 12, now].pack('NA4NN')
	    write hdr
	    write pl
	    write [pl.length + 20].pack('N')
	end

    end

end

class SrcInfo

    def hexdump(str)
	ofs = 0
	star = false
	while ofs < str.size
	    buf = str[ofs, 16]
            if (buf == "\0" * 16)
		print "*\n" unless star
		star = true
		ofs += 16
		next
	    end
	    star = false
	    pbuf = buf.gsub(/[^ -~]/, '.')
	    fmt1 = format(" %08X  %23s - %23s  %-16s\n", ofs, '', '', pbuf)
	    for i in 0 ... buf.size
		cofs = 11 + i * 3
		cofs += 2 if i >= 8
		fmt1[cofs, 2] = format('%02X', buf[i])
	    end
	    print fmt1
	    fmt2 = format(" %8d  %23s - %23s\n", ofs, '', '')
	    for i in 0 ... (buf.size / 4)
		iconv = buf[i * 4, 4].unpack('N').first
		cofs = 11 + i * 12
		cofs += 2 if i >= 2
		fmt2[cofs,11] = format('%11u', iconv)
	    end
	    print fmt2
	    ofs += 16
	end
    end

    def subcmerge(a, b)
        return a if a == b
	kind = a[0..3]
	case kind
        when 'ETA ', 'SIGM', 'Z*  '
            print "--- 1st ---\n"
            hexdump(a)
            print "--- 2nd ---\n"
            hexdump(b)
	    raise "different SUBC #{kind} cannot be merged"
        when 'TDIF', 'RADR', 'ISPC'
	    ofs = 4
	    while ofs < a.length
		if ((bs = b[ofs, 4]) != "\0\0\0\0")
		    if ((as = a[ofs, 4]) != "\0\0\0\0")
			raise <<EOF if bs != as
SUBC merge: inconsistent TDIF #{(as + bs).unpack('NN').join(', ')}
EOF
		    end
		    b[ofs, 4] = as
		end
		ofs += 4
	    end
	else
            print "--- 1st ---\n"
            hexdump(a)
            print "--- 2nd ---\n"
            hexdump(b)
	    raise "unknown SUBC type #{kind}"
	end
	a
    end

    def initialize(path, mrginfo)
	subctab = mrginfo.subctab
	@path = path
	@told_ofs = mrginfo.data_ofs
        begin
	    fp = Nusdas::NusRecStream.open(path)

	    head, @nusd = fp.get_record_str()
	rescue Exception => e
	    print "Error #{e.message} #{e.backtrace.first}\n"
	    print "#{path} is not a nusdas\n"
	    throw :fmterror
        end
	raise "NUSD not found" unless head[4..7] == "NUSD"
	@nrec =	nusd.unpack('x88N').first
   	@ninfo = nusd.unpack('x92N').first
	@nsubc = nusd.unpack('x96N').first
	raise "NUSD size err #{@nusd.size}" if @nusd.size != 100

	head, @cntl = fp.get_record_str()
	raise "CNTL not found" unless head[4..7] == "CNTL"
	@n_dc = @cntl[36,4].unpack('N').first
        @n_vt = @cntl[40,4].unpack('N').first
        @n_lv = @cntl[44,4].unpack('N').first
        @n_el = @cntl[48,4].unpack('N').first
	if (@cntl.size != 156 + 4 * @n_dc + 8 * @n_vt \
	    + 12 * n_lv + 6 * n_el)
	    raise "CNTL size err #{@cntl.size}"
	end

	head, @indx = fp.get_record_str()
	raise "INDX not found" unless head[4..7] == "INDX"
	raise "INDX size err #{@indx.size}" \
	    if (@indx.size != 4 * @n_dc * @n_vt * @n_lv * @n_el)

	w_info = @ninfo
	w_subc = @nsubc
	@data_ofs = nil
	@subcinfo_size = 0
	while true
	    tell_backspace = fp.tell
	    head, payload = fp.get_record_str()
	    break if (head.nil?)
	    case rtype = head[4..7]
	    when 'SUBC'
	        subctype = payload[0..3]
		if subctab[subctype]
		    subctab[subctype] = subcmerge(subctab[subctype], payload)
		else
	            subctab[subctype] = payload
		end
		@subcinfo_size += (payload.size + 20)
		w_subc -= 1
		next
	    when 'INFO'
		mrginfo.infotab.push payload
		@subcinfo_size += (payload.size + 20)
		w_info -= 1
		next
	    when /^(NUSD|CNTL|INDX)$/
		raise "no more #{rtype}\n"
	    else
		@data_ofs = tell_backspace if @data_ofs.nil?
	        break if w_info + w_subc == 0
	    end
	end
	raise "SUBC less than #{@nsubc}" if w_subc.nonzero?
	raise "INFO less than #{@ninfo}" if w_info.nonzero?
	@eofpos = @nusd[84,4].unpack('N').first
	fp.seek(@eofpos - 4)
	endsize = (endsize_str = fp.read(4)).unpack('N').first
	raise "END size err #{endsize}" if endsize != 28
	fp.seek(@eofpos - endsize)
	@endpos = fp.tell
	raise "last record is broken" unless fp.read(4) == endsize_str
	raise "last record is not END" unless fp.read(4) == 'END '
	fp.close
    end

    attr_reader :path, :data_ofs, :endpos, :indx, :nusd, :cntl,
	:nrec, :ninfo, :nsubc, :n_dc, :n_vt, :n_lv, :n_el, :told_ofs

    def indx_merge(older, p_subcinfo_size)
	newer = @indx.unpack('N*')
	return newer if older.nil?
	older.each_index do |i|
	    v = newer[i]
	    next if (v == 0 or v == 0xFFFFFFFF)
	    unless (older[i] == 0 or older[i] == 0xFFFFFFFF) 
		STDERR.print "overwrite #{i}\n"
	    end
	    older[i] = (v + p_subcinfo_size - @subcinfo_size + @told_ofs)
	end
	return older
    end

    MAXCHUNK = 4 * 1024 * 1024

    def copydata(ofp)
	ifp = Nusdas::NusRecStream.open(@path, 'r')
	ifp.seek @data_ofs
	todo = @endpos - @data_ofs
	while todo > 0
	    chunk = ((todo > MAXCHUNK * 2) ? MAXCHUNK : todo)
	    ofp.write(ifp.read(chunk))
	    todo -= chunk
	end
        ifp.close
    end

end

class MrgInfo

    attr_reader :srcs, :relpath, :nusd_inv, :cntl, :infotab, :subctab,
	:data_ofs, :vttab

    def initialize(relpath)
	@relpath = relpath
	@srcs = Array.new
	@subctab = Hash.new
	@infotab = Array.new
	@vttab = Array.new
	@nusd_inv = @cntl = @indx = nil
	@md_nrec = 4
	@m_size = nil
	@data_ofs = 0
    end

    def attach_first(srcinfo)
	@nusd_inv = srcinfo.nusd[0..83]
	@cntl = srcinfo.cntl
	# size of mandatory records (only NUSD, CNTL, and INDX)
	@m_size = srcinfo.nusd.size + srcinfo.cntl.size \
	    + srcinfo.indx.size + 60
    end

    def attach_nofirst(srcinfo)
	if srcinfo.nusd[0..83] != @nusd_inv
	    STDERR.print "NUSD mismatch\n"
	end
	if srcinfo.cntl != @cntl
	    raise "CNTL mismatch"
	end
	#
	# SUBC merge
	#  support for TDIF, RADR, or ISPC is not done
	#
	for subctype in subctab.keys
	    next if @subctab[subctype] == subctab[subctype]
	    unless @subctab[subctype].nil?
		raise "different SUBC.#{subctype}"
	    end
	    @subctab[subctype] = subctab[subctype]
	end
    end

    def attach(srcinfo)
	if @nusd_inv.nil?
	    attach_first(srcinfo)
	else
	    attach_nofirst(srcinfo)
	end
	@srcs.push srcinfo
	@md_nrec += (srcinfo.nrec - 4 - srcinfo.ninfo - srcinfo.nsubc)
	@data_ofs += (srcinfo.endpos - srcinfo.data_ofs)
    end

    def subcinfo_size
	i = 0
	for irec in infotab do
	    i += (irec.size + 20)
	end
	for subcn, subcv in subctab do
	    i += (subcv.size + 20)
	end
	i
    end

    def size
	# END has 28 bytes.
	@m_size + subcinfo_size + @data_ofs + 28
    end

    def nrec
	@md_nrec + infotab.size + subctab.size
    end

    def nusd
	nusd_inv + [size, nrec, infotab.size, subctab.size].pack('N4')
    end

    def indx
	ary = nil
	sis = subcinfo_size
	for srcinfo in @srcs
	    ary = srcinfo.indx_merge(ary, sis)
	end
	ary.pack('N*')
    end

    def endrec
	[size, nrec].pack('N2')
    end

end

class FileTable < Hash

    def checkfile(relpath, path)
        catch(:fmterror) do
	    if self[relpath].nil?
		mi = MrgInfo.new(relpath)
	    else
		mi = self[relpath]
	    end
	    si = SrcInfo.new(path, mi)
	    mi.attach(si)
	    self[relpath] = mi if self[relpath].nil?
	    true
	end
    end

    def search(topdir, relpath)
        path = "#{topdir}#{relpath}"
	return if relpath == '/nusdas_def'
        return checkfile(relpath, path) if not File.directory?(path)
    	dp = Dir.open(path)
	for entry in dp
	    next if /^\./ =~ entry
	    rel_entry = "#{relpath}/#{entry}"
	    search(topdir, rel_entry)
	end
    end

end

class NusMerge

    def self::help
	print <<EOF
#{$0}: restructure NuSDaS datasets directory structure

usage:
	#{$0} [-d] [-o NRD|--output NRD] NRD [ ... ]

options:
	-d --debug	DEBUG mode

EOF
	exit 1
   end

    def mkdir_parent to
        parent = to.sub(%r|/+[^/]+/*$|, '')
	return if @mkdir[parent]
	return @mkdir[parent] = true if File::directory? parent
	mkdir_parent(parent)
	print "mkdir #{parent}\n"
	Dir::mkdir parent
	@mkdir[parent] = true
    end

    BAD = 65535

    def mergefile(relpath)
        path = @outdir + relpath
	mkdir_parent path
        mi = @mrgtab[relpath]
	print "creating #{path} from\n"
	$defout.flush if $VERBOSE

	ofp = Nusdas::NusRecStream.open(path, 'w')
	ofp.put_record 'NUSD', mi.nusd
	ofp.put_record 'CNTL', mi.cntl
	ofp.put_record 'INDX', mi.indx
	for subck, subcv in mi.subctab
	    ofp.put_record 'SUBC', subcv
	end
	for inforec in mi.infotab
	    ofp.put_record 'INFO', inforec
	end
	for si in mi.srcs
	    print "\t#{si.path} #{si.told_ofs}\n" if $VERBOSE
	    si.copydata(ofp)
	end
	ofp.put_record 'END ', mi.endrec
	ofp.close
    end

    def initialize(outdir, argv)
        @outdir = outdir
        @mrgtab = FileTable::new
	@mkdir = Hash::new
    	for topdir in argv
	    @mrgtab.search(topdir, '')
	end
	for relpath, minfo in @mrgtab
	    mergefile(relpath) 
	end
    end

    def self::main
        require 'getopts'
	getopts('dcqw', *%w(o: output: debug verbose quiet))
	$DEBUG = true if $OPT_d or $OPT_debug
	$VERBOSE = (not ($OPT_q or $OPT_quiet))
	$OPT_output = $OPT_o if $OPT_o
	help if ($OPT_output.nil?)
	self.new($OPT_output, ARGV)
    rescue => e
        STDERR.print e.message, " at ", e.backtrace.first, "\n"
        print e.message, "\n"
        help
    end

end


NusMerge.main
exit 0
