mirror of https://github.com/tasks/tasks
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
47 lines
1.1 KiB
Python
47 lines
1.1 KiB
Python
#**********************************************************************************************
|
|
#
|
|
# simplify.py - switch traditional Chinese text to simplified Chinese
|
|
#
|
|
# Author: Yue Zhang
|
|
#
|
|
#**********************************************************************************************
|
|
|
|
import sys
|
|
|
|
g_sHint = "simplify.py \
|
|
Convert text from traditional Chinese to Simplified Chinese. \
|
|
Yue Zhang \
|
|
frcchang@gmail.com\
|
|
\
|
|
Usage - simplify.py traditional.txt >simplify.txt\
|
|
\
|
|
The input file must be encoded in UTF-8. \
|
|
You also need to put the char table utftable.txt in the same directory. \
|
|
\
|
|
"
|
|
|
|
if len(sys.argv) != 2:
|
|
print g_sHint
|
|
sys.exit(1)
|
|
|
|
table = {}
|
|
file = open("utftable.txt")
|
|
line = file.readline()
|
|
while line:
|
|
line = line.strip()
|
|
line = unicode(line, "utf-8")
|
|
assert len(line)==3 and line[1]=="="
|
|
table[line[2]] = line[0]
|
|
line = file.readline()
|
|
file.close()
|
|
|
|
file = open(sys.argv[1])
|
|
line = file.readline()
|
|
while line:
|
|
line = line[:-1]
|
|
line = unicode(line, "utf-8")
|
|
output = [table.get(char, char) for char in line]
|
|
print "".join(output).encode("utf-8")
|
|
line = file.readline()
|
|
file.close()
|