generate_german.sh 539 Bytes
Newer Older
1
2
3
4
5
6
7
#!/bin/sh

curl -f -s http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en/de-en.txt.gz |
	gzip -d |            # uncompress
	sed -r 's#::.*##;    # delete translation
	/^#.*/d;             # delete comments
	s/ ?[;|] ?/\
Felix Eckhofer's avatar
Felix Eckhofer committed
8
/g;                          # split alternative forms into seperate lines
9
10
11
12
13
14
15
16
	s# ?[([{].*##;       # delete annotations and metadata
	/ [^ ]/d;            # delete composite phrases
	/\.\.\.$/d;          # delete word-parts
	s# $##;              # delete trailing white space
	' |
	tr 'a-zöäü' 'A-ZÖÄÜ' |
	sort -u