You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tasks/bin/simplify.py

47 lines
1.1 KiB
Python

#**********************************************************************************************
#
# simplify.py - switch traditional Chinese text to simplified Chinese
#
# Author: Yue Zhang
#
#**********************************************************************************************
import sys
g_sHint = "simplify.py \
Convert text from traditional Chinese to Simplified Chinese. \
Yue Zhang \
frcchang@gmail.com\
\
Usage - simplify.py traditional.txt >simplify.txt\
\
The input file must be encoded in UTF-8. \
You also need to put the char table utftable.txt in the same directory. \
\
"
if len(sys.argv) != 2:
print g_sHint
sys.exit(1)
table = {}
file = open("utftable.txt")
line = file.readline()
while line:
line = line.strip()
line = unicode(line, "utf-8")
assert len(line)==3 and line[1]=="="
table[line[2]] = line[0]
line = file.readline()
file.close()
file = open(sys.argv[1])
line = file.readline()
while line:
line = line[:-1]
line = unicode(line, "utf-8")
output = [table.get(char, char) for char in line]
print "".join(output).encode("utf-8")
line = file.readline()
file.close()