【codecs.BOM】使用Python程序输出csv格式数据

这些天做一个数据处理的任务:
要求是输出一个csv格式的文件。
每次用NotePad++打开都正常,一用excel打开就一万个乱码,
好不容易乱码没了,用作分隔符的逗号也进单元格去了(变成单列了)

辛苦的查原因调试代码测试输出过程略去(以后要是有心情再来写好了)……
重点是!~
把能用的模板记录一下,以后说不定还要用呢QwQ

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
[Convert2Csv.py]
# -*- coding: utf-8 -*-

import codecs
from multiprocessing import Pool

import os
import re
import sys
import csv
import urllib2
import commands
import ConfigParser

if __name__ == '__main__' :
infile = "./outSort"
with open('Features.csv', 'wb+') as csvfile:
csvfile.write(codecs.BOM_UTF8)
spamwriter = csv.writer(csvfile,dialect='excel')
spamwriter.writerow(['Sign', 'Count', 'Feature', 'Info'])
for each in [line for line in file(infile)] :
list = ['0']
sublist1 = ['','','']
sublist2 = ['','','']
segs = each.split('\t')
if not segs :
break
list.append(segs[0])
list.append(segs[1])
list.append(segs[2])
if segs[3]:
sublist1.append(segs[3])
if segs[4]:
sublist2.append(segs[4])
spamwriter.writerow(list)
spamwriter.writerow(sublist1)
spamwriter.writerow(sublist2)
```


输入:UTF-8编码文件
输出:Scv格式数据,使用Excel打开或NotePad打开中文均不乱码

要点:
``` python
with open('Features.csv', 'wb+') as csvfile:
csvfile.write(codecs.BOM_UTF8)
spamwriter = csv.writer(csvfile,dialect='excel')

CSV写入函数解释:

1
2
spamwriter.writerow(list) 
# 传入一个list即可,写一行,每个元素为当前行的一列,若需要空行,请务必增加一个空字符串元素

【Updated 2016/03/18】
现在难度大了点,要合并单元格,于是就学着用了用xlwt和xlrd:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding: utf-8 -*-

import codecs
from multiprocessing import Pool

import os
import re
import sys
import csv
import xlrd
import xlwt
import urllib2
import commands
import ConfigParser

def MakeCsv():
infile = "./outSort"
with open('Features.csv', 'wb+') as csvfile:
csvfile.write(codecs.BOM_UTF8)
spamwriter = csv.writer(csvfile,dialect='excel')
spamwriter.writerow(['Sign', 'Count', 'Feature', 'Info'])
for each in [line for line in file(infile)] :
list = ['0']
sublist1 = ['','','']
sublist2 = ['','','']
segs = each.split('\t')
if not segs :
break
list.append(segs[0])
list.append(segs[1])
list.append(segs[2])
if segs[3]:
sublist1.append(segs[3])
if segs[4]:
sublist2.append(segs[4])
spamwriter.writerow(list)
spamwriter.writerow(sublist1)
spamwriter.writerow(sublist2)

def MakeInput() :
(status, output) = commands.getstatusoutput('sort -k1 -n -r outData > outSort')

def Combine() :
# Create a Workbook
f = xlwt.Workbook()
sheet = f.add_sheet(u'Feature',cell_overwrite_ok=True)
# sheet.write(codecs.BOM_UTF8)

# Generate 0-th line
row0 = ['Sign', 'Count', 'Feature', 'Info']
for i in range(0,len(row0)):
sheet.write(0,i,row0[i])

# Generate Others
cur_row = 1
infile = "./outSort"
for each in [line.decode('utf-8') for line in file(infile)] :
segs = each.split('\t')
if not segs :
break
sheet.write_merge(cur_row,cur_row+2,0,0,'0')
sheet.write_merge(cur_row,cur_row+2,1,1,segs[0])
sheet.write_merge(cur_row,cur_row+2,2,2,segs[1])
sheet.write(cur_row,3,segs[2])
if segs[3]:
sheet.write(cur_row+1,3,segs[3])
if segs[4]:
sheet.write(cur_row+2,3,segs[4])
cur_row += 3

# print 'OVER'
f.save('./Demo.xls')

if __name__ == '__main__' :
# MakeCsv()
MakeInput()
Combine()