#!/vol/perl/bin/perl -w
# sampa to praat ipa font converter
# By Thorsten Trippel
# University of Bielefeld
# ttrippel@spectrum.uni-bielefeld.de
# May 2001
#
# This program requires perl 5.6 or higher, 
# tested on perl 5.6.0 build for sun4-solaris
#
#
# This program is a pure font converter, using SAMPA-IPA symbols as input, 
# read from input file, and produces coresponding PRAAT-IPA-notation, 
# which are ASCII combinations, send to STDOUT
#
# Usage: sampa2praat.pl INPUTFILE 
#


# Preprocessing rule needs to replacs X-Sampa J\ by 
# "iotbackslash" due to otherwise multiple-matching substrings
# no other way of expressing regular expressions have been found due to a
# lack of a sufficient delimiting character (i.e. whitespace, etc.)

# For historic reasons the keys of the hashes are PRAAT-representations
# this does not have any consequences as long as the IPA characters exist 
# in both notations. However the PRAAT manual does not specify the following 
# characters (which are existend in the IPA chart with symbols in SAMPA 
# notation):
# O\ |\ !\ =\ |\|\ p_> t_> k_> s_> x\ @\ 3\

# Beginning of translation table

%praat2sampa= (
"\\c\,"   =>   "C", 
"\\l\-"   =>   "K(?!\\\\)",
"\\lz"   =>   "K\\\\",
"\\lc"   =>   "L\\\\",
"\\9\-"   =>   "<\\\\",
"\\9e"   =>   "\\?\\\\",
"\\l\."   =>   "l`",
"\\ab"   =>   "Q",
"\\ae"   =>   "\\{",
"\\as"   =>   "A",
"\\at"   =>   "6",
"\\b\^"   =>   "b_<",
"\\bc"   =>   "B\\\\",
"\\be"   =>   "B",
"\\cc"   =>   "s\\\\",
"\\ct"   =>   "O",
"\\d\."   =>   "d`",
"\\d\^"   =>   "d_<",
"\\dh"   =>   "D",
"\\ep"   =>   "E",
"\\er"   =>   "3",
"\\f\."   =>   "r`",
"\\f2"   =>   "p\\\\",
"\\fh"   =>   "4",
"\\g\^"   =>   "g_<",
"\\gc"   =>   "G\\\\",
"\\h\-"   =>   "X\\\\",
"\\h\^"   =>   "(?<!f)h\\\\(?!h)",
"\\hc"   =>   "H\\\\",
"\\hs"   =>   "U",
"\\ht"   =>   "H",
"\\i\-"   =>   "1",
"\\ic"   =>   "I",
"\\j\^"   =>   "J\\\\_<",
"\\jc"   =>   "j\\\\",
"\\?-"   =>   ">\\\\",
"\\l\~"   =>   "5",
"\\mj"   =>   "F",
"\\ml"   =>   "M\\\\",
"\\mt"   =>   "M",
"\\n\."   =>   "n`",
"\\nc"   =>   "N\\\\",
"\\ng"   =>   "N",
"\\nj"   =>   "J",
"\\o\-"   =>   "8",
"\\oe"   =>   "(?<!\\\\)9(?!\\\\|e)",
"\\r\."   =>   "r\\\\`",
"\\rc"   =>   "R\\\\",
"\\rh"   =>   "7",
"\\ri"   =>   "R",
"\\rl"   =>   "(?<!m)l\\\\",
"\\s\."   =>   "s`",
"\\sh"   =>   "S",
"\\sr"   =>   "`.@`.",
"\\te"   =>   "(?<!\\\\)T",
"\\u\-"   =>   "(?<!_)}",
"\\vs"   =>   "P",
"\\vt"   =>   "V",
"\\wt"   =>   "W",
"\\yc"   =>   "Y",
"\\yt"   =>   "L(?!\\\\)",
"\\z\."   =>   "z`",
"\\zc"   =>   "z\\\\",
"\\zh"   =>   "Z",
"a"   =>   "a",
"b"   =>   "b",
"c"   =>   "c",
"d"   =>   "d",
"d\\cn"   =>   "_}\\(t_}\\)",
"e"   =>   "e",
"f"   =>   "f",
"h"   =>   "h",
"i"   =>   "i",
"j"   =>   "j",
"k"   =>   "k",
"l"   =>   "l",
"m"   =>   "m",
"n"   =>   "n",
"notspecified"   =>   "!\\\\",
"notspecified"   =>   "3\\\\",
"notspecified"   =>   "=\\\\",
"notspecified"   =>   "@\\\\",
"notspecified"   =>   "O\\\\",
"notspecified"   =>   "k_>",
"notspecified"   =>   "p_>",
"notspecified"   =>   "s_>",
"notspecified"   =>   "t_>",
"notspecified"   =>   "x\\\\",
"notspecified"   =>   "\\|\\\\",
"o"   =>   "o",
"o\\T\^"   =>   "_r\\(e_r\\)",
"o\\Tv"    =>   "_oe_o",
"p"   =>   "p",
"q"   =>   "q",
"r"   =>   "r",
"s"   =>   "s",
"t"   =>   "t",
"u"   =>   "u",
"v"   =>   "v",
"w"   =>   "w",
"x"   =>   "x",
"y"   =>   "y",
"z"   =>   "z",
"n\\|v"   =>   "\_=",
"\\G\^"   =>   "G\\_\\<",
"\\?g"   =>   "\\?(?!\-|\\ )",
"\\ci"   =>   "X(?!\\\\)",
"\\ga"   =>   "G(?!\\\\|^|_)",
"\\gs"   =>   "(?<!\\\\|n)g(?!^|_)",
"\\j\-"   =>   "(?<!\\\\)jotbackslash",
"\\rt"   =>   "r\\\\(?!`)",
"\\sw"   =>   "@(?!`)",
"\\t\."   =>   "t`",
"\\o/"   =>   "(?<!f)2",
"k\\lip"   =>   "_(?!`|^|<|r|})",
);
 


# Processing loop 
# This is the major function reading the input and replacing a string from 
# the translation table one at a time

while (<>){
$var=$_;
$var=~s#J\\#jotbackslash#;
while (($praat,$sampa)= each(%praat2sampa))
	{
	$var=~s#$sampa#$praat#;
	 };
	print "$var";
};

# End of function
# EOF
