tc
| This has been superseded by oc on Windows. |
A tiny little python script used for the conversion of filenames into traditional Chinese and, for supported filetypes, the conversion of the contents of files into traditional Chinese at the user’s direction. Apart from python itself, on Linux, pip needs to be installed separately through sudo apt install python3-pip, and then openpip install opencc --break-system-packages; on Windows, pip is automatically installed with python installation, but you may have to use open
By default, the script will rename the designated file or files into traditional Chinese, and for supported filetypes, to wit, .ass, .css, .htm, .html, .md, .shtm, .shtml, .srt, .txt, .vtt, .wiki, .xht, .xhtml, and .xml files, if -c switch is passed to the script, then copies of the files with their contents also converted into traditional Chinese will be saved alongside the originals with a suffix ‘_t’ appended to their filenames.
The reversed conversion can be triggered with the -s switch, where in this case, the suffix appended to the filenames will correspondingly change to ‘_s’.
Due to python’s independent and yet also inscrutable way of dealing with the paths, its processing logic becomes funny and somewhat risky when pure path or paths are given to the script—
- if a path ends with a backslash
\(or a slash/on Linux) is given to the script, then no conversion will be done at all; - if an asterisk
*mark is added to a path ending with a backslash\(or a slash/on Linux) and passed to the script, then all the files under the given path but not with its subfolders will be renamed;
(the asterisk*mark could also work as a wildcard but, in that way, the script shall perform in a logic with which we are all familiar;) - if a path is given without ending with a backslash
\(or a slash/on Linux), then the script shall convert all the names of the files, and all the names of the subfolders under the given path; - an optional
-dswitch is added specifically for the preceding instance—if used, the script will then rename the terminal directory only, not any of its files or subfolders; and - an optional
-rswitch is added to rename recursively, but it is vulnerable and only partially effective when no-dswitch is given, the path is given without ending with a backslash\(or a slash/on Linux), and the subfolders themselves need no conversion.
These rules above also apply to the conversion of contents for supported filetypes when -c is used. The current state of the script in dealing with paths is clearly not entirely satisfactory, and yet that is what I can achieve for now.
There is also a REG file attached below to add some commonly used conversion commands to the shell, which, obviously, only works on Windows.
A little note is that the asymmetrical selection of conversion tables ‘s2twp’ and ‘t2s’ is on (my personal) purpose, not an error; feel free to change them at your desire.
The python script
import os
import sys
import glob
from opencc import OpenCC
def rn(p, o):
d, b = os.path.split(p)
nb = o.convert(b)
return os.path.join(d, nb)
def dc(f):
cs = ["utf-8", "utf-8-sig", "utf-16-le", "utf-16-be", "gb2312", "big5", "windows-1252"]
with open(f, "rb") as r:
rw = r.read()
for e in cs:
try:
rw.decode(e)
return e
except:
continue
return None
def cc(f, o, s):
e = {".ass", ".css", ".htm", ".html", ".md", ".shtm", ".shtml", ".srt", ".txt", ".vtt", ".wiki", ".xht", ".xhtml", ".xml"}
x = os.path.splitext(f)[1].lower()
if x in e:
cp = dc(f)
if not cp:
return None
with open(f, "r", encoding=cp) as r:
t = r.read()
nt = o.convert(t)
nf = f"{os.path.splitext(f)[0]}{s}{x}"
with open(nf, "w", encoding="utf-8") as w:
w.write(nt)
return nf
return None
def pr(p, o, c, s, r, d):
if d:
if os.path.isdir(p):
np = rn(p, o)
if p != np:
os.rename(p, np)
elif os.path.isfile(p):
c_f = None
if c:
c_f = cc(p, o, s)
np = rn(p, o)
if p != np:
os.rename(p, np)
if c_f:
c_np = rn(c_f, o)
if c_f != c_np:
os.rename(c_f, c_np)
elif os.path.isdir(p):
if r:
for rt, ds, fs in os.walk(p):
for d in ds:
dp = os.path.join(rt, d)
np = rn(dp, o)
if dp != np:
os.rename(dp, np)
for f in fs:
fp = os.path.join(rt, f)
c_f = None
if c:
c_f = cc(fp, o, s)
np = rn(fp, o)
if fp != np:
os.rename(fp, np)
if c_f:
c_np = rn(c_f, o)
if c_f != c_np:
os.rename(c_f, c_np)
else:
for d in os.listdir(p):
fp = os.path.join(p, d)
if os.path.isdir(fp):
np = rn(fp, o)
if fp != np:
os.rename(fp, np)
elif os.path.isfile(fp):
c_f = None
if c:
c_f = cc(fp, o, s)
np = rn(fp, o)
if fp != np:
os.rename(fp, np)
if c_f:
c_np = rn(c_f, o)
if c_f != c_np:
os.rename(c_f, c_np)
def mn():
o = OpenCC("s2twp" if "-s" not in sys.argv else "t2s")
s = "_t" if "-s" not in sys.argv else "_s"
c = "-c" in sys.argv
r = "-r" in sys.argv
d = "-d" in sys.argv
a = [x for x in sys.argv[1:] if not x.startswith("-")]
if not a:
a = ["."]
for x in a:
if x.endswith("*"):
for p in glob.glob(x):
pr(p.rstrip("*"), o, c, s, r, d)
else:
pr(x, o, c, s, r, d)
if __name__ == "__main__":
mn()
The wrappers
@echo off
chcp 65001 >nul
py -3 "%~dpn0.py" %*
#!/bin/bash
s=$0
[[ $s != */* ]] && s=$(command -v -- "$s")
s=$(readlink -f -- "$s")
python3 "${s}.py" "$@"
The REG file
Windows Registry Editor Version 5.00
[HKEY_CLASSES_ROOT\*\shell\OpenCC]
"MUIVerb"="OpenCC"
"SubCommands"=""
[HKEY_CLASSES_ROOT\*\shell\OpenCC\shell]
[HKEY_CLASSES_ROOT\*\shell\OpenCC\shell\1s2twp]
@="Convert the filename into traditional Chinese"
[HKEY_CLASSES_ROOT\*\shell\OpenCC\shell\1s2twp\command]
@="\"C:\\_c\\tc.bat\" \"%1\""
[HKEY_CLASSES_ROOT\*\shell\OpenCC\shell\2t2s]
@="Convert the filename into simplified Chinese"
"CommandFlags"=dword:00000040
[HKEY_CLASSES_ROOT\*\shell\OpenCC\shell\2t2s\command]
@="\"C:\\_c\\tc.bat\" \"%1\" -s"
[HKEY_CLASSES_ROOT\*\shell\OpenCC\shell\3s2twp-c]
@="Convert the filename and make a traditionalised copy if supported"
[HKEY_CLASSES_ROOT\*\shell\OpenCC\shell\3s2twp-c\command]
@="\"C:\\_c\\tc.bat\" \"%1\" -c"
[HKEY_CLASSES_ROOT\*\shell\OpenCC\shell\4t2s-c]
@="Convert the filename and make a simplified copy if supported"
[HKEY_CLASSES_ROOT\*\shell\OpenCC\shell\4t2s-c\command]
@="\"C:\\_c\\tc.bat\" \"%1\" -c -s"