#!/usr/bin/python '''Newline converter - Converts \\r and \\r\\n line endings into \\n line endings* - Removes byte order marks (BOM) * When run on UNIX or Mac OS X (not tested on Windows) # License: GNU General Public License, see http://www.clips.ua.ac.be/~vincent/scripts/LICENSE.txt ''' __author__ = "Kim Luyckx, Vincent Van Asch" __date__ = "July 2011" __version__="1.0" import sys, os import getopt from codecs import BOM_BE, BOM_LE, BOM_UTF8 def _usage(): print >>sys.stderr, ''' Convert line endings (version %s) USAGE: python ./reline.py [-b] [-n] inputfile outputfile Converts \\r and \\r\\n line endings into \\n line endings and removes the byte order mark (BOM) at the start of the file. OPTIONS -b : If present: Preserve byte order marks for UTF8, UTF16 (LE and BE) on the first line. -n: Leave the newlines as they are NOTE - Setting -b and -n simultaneously means doing nothing. - Tested UNIX ans Mac OS X; not tested on Windows. %s, %s ''' %(__version__, __author__, __date__) if __name__ == "__main__": try: opts,args=getopt.getopt(sys.argv[1:],'hbn', ['help']) except getopt.GetoptError: # print help information and exit: _usage() sys.exit(2) remove_byteorder=True mode = "rU" for o, a in opts: if o in ('-h', '--help'): _usage() sys.exit() if o in ('-b',): remove_byteorder=False if o in ('-n',): mode="r" if len(args) == 2: infile = os.path.expanduser(args[0]) outfile = os.path.expanduser(args[1]) f=open(infile, mode) input=f.readlines() f.close() if remove_byteorder: input[0] = input[0].lstrip(BOM_BE) input[0] = input[0].lstrip(BOM_LE) input[0] = input[0].lstrip(BOM_UTF8) out=open(outfile,"w") out.write("".join(input)) out.close() else: _usage() sys.exit(1)