0%

使用python解析CVS表格绘制燃尽图和收敛图

近期有个任务,是让部门的项目开发变得可视化,以便老板们能快速了解项目情况。

我这边主要是提供思路,从PMS系统中提取项目的任务以及缺陷情况,绘制成两张图——任务燃尽图(TR4)以及缺陷收敛图(TR5)。

当然这个开发任务最终还是公司IT部门承接,我主要是配合。

近期恰好学习了一些python的知识,心里想着python是否也能实现,于是就有了这篇文章。

任务燃尽图

源码

使用方法:

  • task.cvs: 从pms特定项目中导出的任务cvs表格文件
  • task_id: 分析指定任务及其所有子任务
1
./xxx.py <task.cvs> <task_id>

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python3
# _*_ coding:utf-8 _*_

import sys
import csv
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import matplotlib

filename = sys.argv[1]
index = sys.argv[2]

def get_xticks(min_date, max_date, delta_day):
# 创建日期数组
x_ticks = []
curr_date = min_date

# 设置日期间隔
delta = timedelta(days=delta_day)

while curr_date <= max_date:
x_ticks.append(curr_date)
curr_date += delta

return x_ticks

def find_min_max_dates(csv_data, column_index):
dates = []
for row in csv_data:
date_str = row[column_index]
try:
date = datetime.strptime(date_str, '%Y-%m-%d').date() # 将日期字符串解析为 datetime 对象
dates.append(date)
except ValueError:
pass

min_date = min(dates) if dates else None
max_date = max(dates) if dates else None

return min_date, max_date

def count_dates_before_3(csv_data, column_index, min_date, max_date):
date_counts = {}

curr_date = min_date
while curr_date <= max_date:
date_counts[curr_date] = 0
curr_date += timedelta(days=1)

for row in csv_data:
date_str = row[column_index]
try:
date_obj = datetime.strptime(date_str, '%Y-%m-%d').date()
if min_date <= date_obj <= max_date:
curr_date = date_obj
while curr_date >= min_date:
date_counts[curr_date] += 1
curr_date -= timedelta(days=1)
except ValueError:
curr_date = max_date
while curr_date >= min_date:
date_counts[curr_date] += 1
curr_date -= timedelta(days=1)
pass

return date_counts

def find_children(csv_data, parent_index, c_col, p_col):
children = []
for row in csv_data:
if row[2] == parent_index:
children.append(row) # 将子任务的索引添加到列表中
children.extend(find_children(csv_data, row[0], c_col, p_col)) # 递归查找子任务的子任务
return children

# 打开原始CSV文件和新的CSV文件
with open(filename, 'r') as csvfile, open(filename+".sort", 'w', newline='') as sorted_csvfile:
# 创建CSV读取器和写入器对象
reader = csv.reader(csvfile)
writer = csv.writer(sorted_csvfile)

# 读取原始CSV文件中的数据
data = list(reader)

writer.writerow(data[0])

# 查找"父任务","计划开始日期","计划完成日期"所在的列索引
p_col = None
s_col = None
e_col = None
l_col = None
for i, cell in enumerate(data[0]):
if cell == "父任务":
p_col = i

if cell == "计划开始日期":
s_col = i

if cell == "计划完成日期":
e_col = i

if cell == "实际完成时间":
l_col = i

# 根据第二列对数据进行排序
sorted_data = sorted(data[1:], key=lambda row: row[0])

for row in sorted_data:
if row[0] == index:
writer.writerow(row)
real_min_date = datetime.strptime(row[s_col], '%Y-%m-%d').date()
#real_min_date = row[s_col]

child_data = find_children(sorted_data, index, 0, p_col)

for child in child_data:
writer.writerow(child)

min_date, max_date = find_min_max_dates(child_data, e_col)

date_now = datetime.now().date()
date_counts_orig = count_dates_before_3(child_data, e_col, real_min_date, max_date)
real_date_counts_orig = count_dates_before_3(child_data, l_col, real_min_date, max_date)

# 截止到今天
date_counts = {k: v for k, v in date_counts_orig.items() if k < date_now}
real_date_counts = {k: v for k, v in real_date_counts_orig.items() if k < date_now}

# 画图
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['axes.unicode_minus'] = False

plt.figure(figsize=(15, 6))

x = list(date_counts.keys())
y = list(date_counts.values())

x1 = list(real_date_counts.keys())
y1 = list(real_date_counts.values())

plt.plot(x, y, label='预期剩余')
plt.plot(x1, y1, label='实际剩余')
plt.xlabel('日期')
plt.ylabel('任务个数')

plt.title('任务燃尽图')

x_ticks = get_xticks(min_date, date_now, 15)
num_list = mdates.date2num(x_ticks)
plt.xticks(num_list, [date.strftime('%Y-%m-%d') for date in x_ticks])

y_ticks = np.arange(0, 280, 20)
plt.yticks(y_ticks)

plt.xticks(rotation=90)
plt.legend()
plt.show()

效果

任务燃尽图

缺陷收敛图

源码

使用方法:

  • issues.cvs: 从pms特定项目中导出的任务cvs表格文件
1
./xxx.py <issues.cvs>

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python3
# _*_ coding:utf-8 _*_

import sys
import csv
from datetime import datetime, timedelta
from collections import Counter

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import matplotlib

filename = sys.argv[1]

def get_xticks(min_date, max_date, delta_day):
# 设置日期间隔
delta = timedelta(days=delta_day)

# 生成日期刻度
x_ticks = mdates.drange(min_date, max_date, delta)

return x_ticks

def find_min_max_dates(csv_data, column_index):
dates = []
for row in csv_data:
date_str_tmp = row[column_index]
date_str = date_str_tmp.split(' ')[0]
try:
date = datetime.strptime(date_str, '%Y-%m-%d').date() # 将日期字符串解析为 datetime 对象
dates.append(date)
except ValueError:
pass

min_date = min(dates) if dates else None
max_date = max(dates) if dates else None

return min_date, max_date

def count_columns(cvs_data, index1, index2, min_date, max_date):
column1_data = []
column2_data = []

for row in cvs_data:
createtime_str = row[index1]
index1_str = createtime_str.split(' ')[0]
column1_data.append(index1_str)
if row[index2] != "":
column2_data.append(row[index2])

# 使用 Counter 统计不同元素的个数
column1_counts = Counter(column1_data)
column2_counts = Counter(column2_data)

# 按日期从旧到新进行排序
sorted_column1_counts = dict(sorted(column1_counts.items(), key=lambda x: x[0]))
sorted_column2_counts = dict(sorted(column2_counts.items(), key=lambda x: x[0]))

# 填充不在日期范围内的日期,并将计数设置为0
current_date = min_date

while current_date <= max_date:
date_str = current_date.strftime('%Y-%m-%d')
if date_str not in sorted_column1_counts:
sorted_column1_counts[date_str] = 0
if date_str not in sorted_column2_counts:
sorted_column2_counts[date_str] = 0
current_date += timedelta(days=1)

# 按日期从旧到新进行排序
sorted_c_counts = dict(sorted(sorted_column1_counts.items(), key=lambda x: x[0]))
sorted_l_counts = dict(sorted(sorted_column2_counts.items(), key=lambda x: x[0]))

current_date = min_date
sorted_c_counts_sum = {}
sorted_l_counts_sum = {}
count_c_sum = 0
count_l_sum = 0
while current_date <= max_date:
date_str = current_date.strftime('%Y-%m-%d')
count_c_sum += sorted_c_counts[date_str]
sorted_c_counts_sum[date_str] = count_c_sum
count_l_sum += sorted_l_counts[date_str]
sorted_l_counts_sum[date_str] = count_l_sum
current_date += timedelta(days=1)

return sorted_c_counts, sorted_l_counts, sorted_c_counts_sum, sorted_l_counts_sum

# 打开原始CSV文件和新的CSV文件
with open(filename, 'r') as csvfile:
# 创建CSV读取器和写入器对象
reader = csv.reader(csvfile)

# 读取原始CSV文件中的数据
data = list(reader)

# 查找"#","创建时间","实际完成日期"所在的列索引
i_col = None
c_col = None
l_col = None
for i, cell in enumerate(data[0]):
if cell == "#":
i_col = i

if cell == "创建时间":
c_col = i

if cell == "实际完成时间":
l_col = i

min_date, fake_max_date = find_min_max_dates(data[1:], c_col)
date_now = datetime.now().date()
max_date = date_now

c_counts, l_counts, c_counts_sum, l_counts_sum = count_columns(data[1:], c_col, l_col, min_date, max_date)

plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False

plt.figure(figsize=(15, 6))

x = list(c_counts.keys())
y = list(c_counts.values())

x1 = list(l_counts.keys())
y1 = list(l_counts.values())

x2 = list(c_counts_sum.keys())
y2 = list(c_counts_sum.values())

x3 = list(l_counts_sum.keys())
y3 = list(l_counts_sum.values())

plt.plot(x, y, label='每日新增')
plt.plot(x1, y1, label='每日解决')
plt.plot(x2, y2, label='累计新增')
plt.plot(x3, y3, label='累计解决')
plt.xlabel('日期')
plt.ylabel('缺陷个数')

plt.title('缺陷收敛图')
x_ticks = get_xticks(min_date, date_now, 15)
# 将数值转换为日期对象
x_dates = [mdates.num2date(x) for x in x_ticks]
# 将日期对象格式化为字符串
x_labels = [date.strftime('%Y-%m-%d') for date in x_dates]
# 设置 x 轴刻度为日期字符串
plt.xticks(x_labels, rotation=90)

y_ticks = np.arange(0, 500, 20)
plt.yticks(y_ticks)

plt.legend()
plt.show()


效果

缺陷收敛图

-------------本文结束 感谢阅读-------------
赏☕