#!/usr/local/bin/ruby
# -*- coding: euc-jp -*-
# $HGid: mkjindex,v 839:f21b92d995e1 2018-02-21 08:52 +0900 yuuji $
# Last modified Thu Jan  4 12:39:13 2018 on firestorm
#
# \indexentry{漢字} を kakasi/chasen を用いて \indexentry{かんじ@漢字}
# に変換し、makeindex に渡して *.ind を作らせる。

# chasen 不調(2018-01-04)

code4ruby	= 'euc'
# $KCODE		= code4ruby
#makeindexcode	= "sjis"
makeindexcode	= "euc"
latexcode	= "jis"
kakasi		= "kakasi -JH -KH -o" + code4ruby[0, 1]
chasen		= 'chasen -F %y'
makeindex	= 'makeindex'
makeindex	= 'makeindex2'
# require 'jcode'

kanafilter	= kakasi

while /^-/ =~ ARGV[0]
  case ARGV[0]
  when /^-[sej]$/
    filter = "nkf %s" % $&
  when "-c"
    kanafilter = chasen
  when "-m"
    ARGV.shift
    makeindex = ARGV[0]
  when "-mendex"
    makeindex = 'mendex -Ef'
    makeindexcode = 'e'
  when '-mc'
    ARGV.shift
    makeindexcode = ARGV[0]
  end
  ARGV.shift
end

def hira(str)
  str.tr("ァ-ン", "ぁ-ん")
  ##str.tr("ァ", "ぁ")
end

def purify(str)
  str.tr("がぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽ",
	 "かきくけこさしすせそたちつてとはひふへほはひふへほ")
end

def gyou(str)
#    s/[][]^ -/]/記号/;
  str.sub!(/[^A-Za-zあ-ん0-9]/, '記号')
  str.sub!(/[0-9]/, '数字')
  str.sub!(/[いうえお]/, 'あ')
  str.sub!(/[きくけこ]/, 'か')
  str.sub!(/[しすせそ]/, 'さ')
  str.sub!(/[ちつてと]/, 'た')
  str.sub!(/[にぬねの]/, 'な')
  str.sub!(/[ひふへほ]/, 'は')
  str.sub!(/[みむめも]/, 'ま')
  str.sub!(/[ゆよ]/, 'や')
  str.sub!(/[りるれろ]/, 'ら')
  str.sub!(/[を]/, 'わ')
  str
end

indexfile	= ARGV[0]
newindexfile	= indexfile.sub(/\.\w+$/, '')+".ind"
tmpfile		= "tmptmp"; #"/tmp/mkji$$"
prenkf		= "nkf -" + code4ruby[0, 1]
mkidxnkf	= "nkf -" + makeindexcode[0, 1]
postnkf		= "nkf -" + latexcode[0, 1]

open("| #{prenkf} #{indexfile}", "r"){|jpindex|
  open("| #{prenkf} #{indexfile} | #{kanafilter}", "r"){|kanaindex|
    open("| #{mkidxnkf} | #{makeindex} | #{postnkf} > #{tmpfile}", "w"){|w|
    while line=jpindex.gets
      nl = hira(kanaindex.gets)
      #nl = kanaindex.gets
      if $DEBUG
	STDERR.printf "line=%s\n", line
	STDERR.printf "nl=%s\n", nl
	STDERR.printf "hiranl=%s\n", nl=hira(nl)
      end
      entry, pages = line.scan(/\\indexentry\{(.*)\}\{(\d+)\}/)[0]
      newent = nl.scan(/\\indexentry\{(.*)\}\{(\d+)\}/)[0]
      if newent && entry != purify(newent[0])
	w.print "\\indexentry{#{newent[0]}@#{entry}}{#{pages}}\n"
      else
	w.print line
      end
    end
    }
  }
}

lastchr=''
open("| #{prenkf} #{tmpfile}", "r"){|tread|
  open("| #{kanafilter} < #{tmpfile} | #{prenkf}", "r"){|tmp2|
    open("| #{postnkf} > #{newindexfile}", "w"){|out|
      while line = tread.gets
	hr = tmp2.gets
	if /\\item\s*((\S).*),/ =~ hr
	  word = hira($1)
	  char = gyou(purify(hira($2.upcase)))
	  if char != lastchr
	    out.puts "  \\item 【#{char}】"
	    lastchr = char
	  end
	end
	## line.sub!(/(\\item\s+)([&%\#~_^{}\$\\].*)/, "\\1\\verb,\\2,")
	line.sub!(/(\\item\s+)([&%\#~_^{}\$].*)/, "\\1\\verb,\\2,")
	out.print line
      end
    }
  }
}

__END__

以下 jperl 版(kakasiのみ)

#!/usr/local/bin/jperl -Leuc
#
# \indexentry{漢字} を kakasi を用いて \indexentry{かんじ@漢字}
# に変換し、makeindex に渡して *.ind を作らせる。

$perlcode	= "euc";
$makeindexcode	= "sjis";
$LaTeXcode	= "jis";
$kakasiopt	= "-JH -KH";
$makeindex	= 'makeindex';


while ($ARGV[0] =~ '^-') {
    $_ = $ARGV[0];
    if (/-[sej]/) {
        $filter	= "nkf $_";
    } else {
        $kakasiopt	= "$kakasiopt $_";
    }
    shift;
}

$indexfile	= $ARGV[0];
$newindexfile	= substr($indexfile, 0, rindex($indexfile, '.')) . ".ind";
$tmpfile	= "tmptmp"; #"/tmp/mkji$$";
$prenkf		= "nkf -" . substr($perlcode, 0, 1);
$kakasiopt	= "$kakasiopt -o$perlcode";
$mkidxnkf	= "nkf -" . substr($makeindexcode, 0, 1);
$postnkf	= "nkf -" . substr($LaTeXcode, 0, 1);

open(JPINDEX, "$prenkf $indexfile|")
 			|| die "Cannot open index file $indexfile.\n";
open(KANAINDEX, "kakasi $kakasiopt < $indexfile|")
			|| die "Cannot exec kakasi.\n";
open(MAKEINDEX, "| $mkidxnkf | $makeindex | $postnkf > $tmpfile")
			|| die "Cannot exec $makeindex\n";


select MAKEINDEX;
# Path#1
while (<JPINDEX>) {
    ($entry, $where) = (/\\indexentry{(.*)}{(\d+)}/);
    ($newent) = (<KANAINDEX> =~ /\\indexentry{(.*)}{(\d+)}/);
    $newent = &purify($newent);
    if ($newent ne $entry) {
        print "\\indexentry{$newent\@$entry}{$where}\n";
    } else {
        print;
    }
}
close(JPINDEX);
close(MAKEINDEX);

open(TMPREAD, "$prenkf $tmpfile|") || die "Cannot open temp file.\n";
open(TMPREAD2, "kakasi $kakasiopt <$tmpfile|") || die "Cannot open t2";
open(OUTPUT, "|$postnkf >$newindexfile")   || die "Cannout output to file\n";
select OUTPUT;
while ($line = <TMPREAD>) {
    if (<TMPREAD2> =~ /\\item\s+((\S).*),/) {
	$word = $1;
	$char = &gyou(&purify("\U$2"));
        if ($char ne $lastchr) {
            print "  \\item 【$char】\n";
            $lastchr = $char;
        }
    }
    $line =~ s/(\\item\s+)([&%\#~_^{}\$\\].*)/$1\\verb,$2,/;
    print $line;
}

close(TMPREAD);
close(OUTPUT);

#unlink $tmpfile;

sub purify {
    # print STDOUT "hoge=$_[0]\t";
    local($str) = (@_); #$_[0] =~
    $str =~ y/がぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽ/かきくけこさしすせそたちつてとはひふへほはひふへほ/;
    return $str;
}

sub gyou {
    local($str) = (@_);
    $_ = $str;
#    s/[][]^ -/]/記号/;
    s/[^A-Za-zあ-ん0-9]/記号/;
    s/[0-9]/数字/;
    s/[いうえお]/あ/;
    s/[きくけこ]/か/;
    s/[しすせそ]/さ/;
    s/[ちつてと]/た/;
    s/[にぬねの]/な/;
    s/[ひふへほ]/は/;
    s/[みむめも]/ま/;
    s/[ゆよ]/や/;
    s/[りるれろ]/ら/;
    s/[を]/わ/;
    return;
}
