• Non ci sono risultati.

Pre-processing class is used for different purposes:

• Converesion which is used to change the DCIOM datatype to NIFTI.

• Best View which is used to get the most informative view of the pictures.

• Visualization which is used to represent the data in JPG or PNG.

• Image Info which is used to extract the volume and size of images.

1

" " "

2

l i b r a r i e s : data pre - p r o c e s s i n g

3

" " "

4

i m p o r t os

5

i m p o r t d i c o m 2 n i f t i

6

i m p o r t p a n d a s as pd

7

i m p o r t m a t p l o t l i b . p y p l o t as plt

8

i m p o r t n i b a b e l as nib

9

i m p o r t n u m p y as np

10

11

" " " t r a i n _ d f : p a t i e n t s l a b l e s

12

p a t i e n t I D : list of patiend ’ s ID in i n p u t

f o l d e r

13

t r a i n _ p a t h : D I C O M f o r m a t path

14

t r a i n _ p a t h _ n i f t i : N I F T I f o r m a t path - - >

o u t p u t for c o n v e r s i o n f u n c t i o n

15

v i s u a l i z a t i o n _ p a t h : JPG f o r m a t path - - >

o u t p u t for v i s u a l i z a t i o n f u n c t i o n

16

s e q u e n c e _ t y p e s : s e q u e n c e type of mMRI p i c t u r e s as a list of s t r i n g

17

" " "

18 19

20

c l a s s P r e P r o c e s s :

21

def _ _ i n i t _ _ (

22

self ,

23

train_df ,

24

pa t i e n t I D ,

25

t r a i n _ p a t h ,

26

t r a i n _ p a t h _ n i f t i ,

27

v i s u a l i z a t i o n _ p a t h ,

28

s e q u e n c e _ t y p e s ,

29

) :

30

31

self . t r a i n _ d f = pd . r e a d _ c s v (

32

" ...[ path to p a t i e n t l a b e l s ] "

33

)

34

self . p a t i e n t I D = os . l i s t d i r (

35

" ... [ list of patients ’ s ID ] "

36

)

37

# self . t r a i n _ p a t h = (

38

# " . . . [ path to raw d a t a s e t D I C O M f o r m a t ]"

39

# )

40

# self . v i s u a l i z a t i o n _ p a t h = "... [ path to save the v i s u a l i z e d i m a g e s ]"

41

# self . t r a i n _ p a t h _ n i f t i = "... [ path to c o n v e r t d d a t a s e t to N I F T I f o r m a t ]"

42

self . s e q u e n c e _ t y p e s = [ " F L A I R " , " T1w " ,

" T 1 w C E " , " T2w " ]

43

44

" " " << c o n v e r s i o n > > is i t e r a t i n g over patient

’ s ID and the s e q u e n c e type ( i n n e r i t e r a t i o n )

45

to c o n v e r t each p i c t u r e into N I F T I f o r m a t and save it in " c o n v e r t e d _ p a t h "

46

" " "

47

48

def c o n v e r s i o n ( self , o r i g i n a l _ p a t h , c o n v e r t e d _ p a t h ) :

49

50

for p a t i e n t in self . p a t i e n t I D :

51

try :

52

for t y p e s in self .

s e q u e n c e _ t y p e s :

53

os . m a k e d i r s (

54

c o n v e r t e d _ p a t h +

p a t i e n t + " / " + t y p e s + " / " , e x i s t _ o k = True

55

)

56

d i c o m 2 n i f t i .

d i c o m _ s e r i e s _ t o _ n i f t i (

57

o r i g i n a l _ p a t h + p a t i e n t

+ " / " + types ,

58

os . path . join (

59

c o n v e r t e d _ p a t h ,

60

p a t i e n t + " / " +

t y p e s + " / " + p a t i e n t + types ,

61

) ,

62

)

63

e x c e p t :

64

c o n t i n u e

65

66

" " " << v i s u a l i z a t i o n > > is i t e r a t i n g over patient ’ s ID and the s e q u e n c e type ( i n n e r i t e r a t i o n )

67

to c o n v e r t each p i c t u r e into JPG f o r m a t and save it in " j p g _ p a t h " for v i s u a l i z a t i o n

68

c o n t a i n i n g : M G M T _ v a l u e s t a t u s for each one

69

" " "

70

71

def v i s u a l i z a t i o n ( self , n i f t i _ p a t h , j p g _ p a t h ) :

72

73

for p a t i e n t in self . p a t i e n t I D :

74

try :

75

for t y p e s in self .

s e q u e n c e _ t y p e s :

76

img = nib . load (

77

os . path . join (

78

n i f t i _ p a t h ,

79

p a t i e n t + " / " +

t y p e s + " / " + p a t i e n t + t y p e s + " . nii " ,

80

)

81

)

82

i m g _ d a t a = img . g e t _ f d a t a ()

83

84

# e x t r a c t i n g d i f f e r e n t

s l i c e of the b r a i n for t h r e e d i r c e t i o n v i e w s

85

s l i c e _ 0 = i m g _ d a t a [ img .

s h a p e [0] // 2 , : , :]

86

s l i c e _ 1 = i m g _ d a t a [: , img .

s h a p e [1] // 2 , :]

87

s l i c e _ 2 = i m g _ d a t a [: , : ,

img . s h a p e [2] // 2]

88

89

# p l o t t i n g 3 v i e w s

90

fig , axes = plt . s u b p l o t s (1 , 3)

91

fig . s e t _ f i g h e i g h t (17)

92

fig . s e t _ f i g w i d t h (40)

93

axes [0]. i m s h o w (

94

s l i c e _ 0 . T , cmap =" gray " ,

o r i g i n = " l o w e r " , a s p e c t = " auto "

95

)

96

axes [0]. s e t _ t i t l e ( " S a g i t t a l

" , f o n t s i z e =30 , f o n t w e i g h t = " bold " )

97

axes [1]. i m s h o w (

98

s l i c e _ 1 . T , cmap =" gray " ,

o r i g i n = " l o w e r " , a s p e c t = " auto "

99

)

100

axes [1]. s e t _ t i t l e ( " C o r o n a l "

, f o n t s i z e =30 , f o n t w e i g h t = " bold " )

101

axes [2]. i m s h o w (

102

s l i c e _ 2 . T , cmap =" gray " ,

o r i g i n = " l o w e r " , a s p e c t = " auto "

103

)

104

axes [2]. s e t _ t i t l e ( " A x i a l " , f o n t s i z e =30 , f o n t w e i g h t = " bold ")

105

plt . s u p t i t l e (

106

f " Axial , S a g i t t a l and

C o r o n a l view of the patient ’ s b r a i n \ n

P a t i e n t ID : { p a t i e n t } M G M T _ v a l u e : { int ( self . t r a i n _ d f . loc [ self . t r a i n _ d f [ ’ B r a T S 2 1 I D ’ ]

== int ( p a t i e n t ) ][ ’ M G M T _ v a l u e ’ ]) } Type : { t y p e s } " ,

107

f o n t s i z e =38 ,

108

f o n t w e i g h t = " bold " ,

109

)

110

os . m a k e d i r s ( j p g _ p a t h +

p a t i e n t + " / " + t y p e s + " / " , e x i s t _ o k = True )

111

fig . s a v e f i g ( # s a v i n g the jpg f o r m a t with info .

112

j p g _ p a t h

113

+ p a t i e n t

114

+ " / "

115

+ t y p e s

116

+ " / "

117

+ p a t i e n t

118

+ t y p e s

119

+ " . jpg " ,

120

dpi =300 ,

121

)

122

e x c e p t :

123

c o n t i n u e

124

125

" " "

126

<< b e s t _ vi ew > > f i n d s the best p o s t i o n view in each d i r e c t i o n of sagittal ,

127

c o r o n a l and a x i a l . It is done by c o u n t i n g all the non - zero cell and s t o r i n g the

128

m a x i m u m v a l u e for each d i r e c t i o n .

129

130

Args :

131

img : N I F T I i m a g e as an i n p u t

132

133

r e t u r n - - > b e s t _ p o s t i o n s as a list c o n t a i n g the best p o s t i o n view for v i s u a l i z a t i o n

134

" " "

135

136

def b e s t _ v i e w ( self , img ) :

137

138

i m g _ d a t a = img . g e t _ f d a t a ()

139

co u n t _ s a g , c o u n t _ a xi , c o u n t _ c o r = [] ,

[] , []

140

141

for i in r a n g e (0 , img . s h a p e [0]) :

142

c o u n t _ s a g . a p p e n d ( np . c o u n t _ n o n z e r o ( i m g _ d a t a [ i , : , :]) )

143

144

for j in r a n g e (0 , img . s h a p e [1]) :

145

c o u n t _ c o r . a p p e n d ( np . c o u n t _ n o n z e r o ( i m g _ d a t a [: , j , :]) )

146

147

for k in r a n g e (0 , img . s h a p e [2]) :

148

c o u n t _ a x i . a p p e n d ( np . c o u n t _ n o n z e r o ( i m g _ d a t a [: , : , k ]) )

149

150

p o s i t i o n _ c o r = np . a r g m a x ( c o u n t _ c o r )

151

p o s i t i o n _ s a g = np . a r g m a x ( c o u n t _ s a g )

152

p o s i t i o n _ a x i = np . a r g m a x ( c o u n t _ a x i )

153

154

r e t u r n [ p o s i t i o n _ s a g , p o s i t i o n _ c o r , p o s i t i o n _ a x i ]

155

156

" " " << i m a g e _ i n f o > > f u n c t i o n e x t r a c t s the v o l u m e and size of i m a g e s + s i n g l e

157

v o c x e l in each one and s t o r e it in a

csv file .

158

" " "

159

160

def i m a g e _ i n f o ( self ) :

161

162

df = pd . D a t a F r a m e (

163

c o l u m n s =[

164

" i m a g e " ,

165

" v o x e l v o l u m e " ,

166

" v o x e l size " ,

167

" i m a g e v o l u m e " ,

168

" i m a g e size " ,

169

]

170

)

171

172

for p a t i e n t in self . p a t i e n t I D :

173

try :

174

for t y p e s in self . s q t y p e s _ t a s k 2

:

175

# img = nib . load (

176

# t r a i n _ p a t h _ n i f t i

177

# + p a t i e n t

178

# + "/"

179

# + p a t i e n t

180

# + ’_ ’

181

# + t y p e s

182

# + ". nii . gz "

183

# )

184

img = nib . load (

185

os . path . join (

186

self .

t r a i n _ p a t h _ n i f t i ,

187

p a t i e n t + " / " +

t y p e s + " / " + p a t i e n t + t y p e s + " . nii " ,

188

)

189

)

190

191

v o x e l _ s i z e = list ( img .

h e a d e r . g e t _ z o o m s () )

192

v o x e l _ v o l u m e = np . prod ( img .

h e a d e r [ " p i x d i m " ] [ 1 : 4 ] )

193

194

i m a g e _ s h a p e = list ( img .

s h a p e )

195

v o x e l _ c o u n t = np . c o u n t _ n o n z e r o ( img . g e t _ d a t a () )

196

i m a g e _ v o l u m e = v o x e l _ v o l u m e

* v o x e l _ c o u n t

197

i m a g e _ s i z e = (

198

i m a g e _ s h a p e [0] *

v o x e l _ s i z e [0] ,

199

i m a g e _ s h a p e [1] *

v o x e l _ s i z e [1] ,

200

i m a g e _ s h a p e [2] *

v o x e l _ s i z e [2] ,

201

)

202

203

df = df . a p p e n d (

204

{

205

" i m a g e " : p a t i e n t +

types ,

206

" v o x e l v o l u m e " :

v o x e l _ v o l u m e ,

207

" v o x e l size " :

v o x e l _ s i z e ,

208

" i m a g e v o l u m e " :

i m a g e _ v o l u m e ,

209

" i m a g e size " :

i m a g e _ s i z e ,

210

} ,

211

i g n o r e _ i n d e x = True ,

212

)

213

e x c e p t :

214

c o n t i n u e

215

216

df . t o _ c s v ( "

i m a g e I n f o _ r e s a m p l e d _ t a s k 2 _ d a t a . csv " , i n d e x =

F a l s e )

libraries_pre.py

Pyradiomics class separates the different areas of the segmented images and generates a csv file containing all the dataset and segmented images directory. The primary function is the feature extractor which gets all the radiomics features as a csv file:

1

" " "

2

l i b r a r i e s : P y r a d i o m i c s

3

" " "

4

i m p o r t os

5

i m p o r t c o l l e c t i o n s

6

i m p o r t csv

7

i m p o r t l o g g i n g

8

i m p o r t S i m p l e I T K as sitk

9

i m p o r t r a d i o m i c s

10

from r a d i o m i c s i m p o r t f e a t u r e e x t r a c t o r

11

i m p o r t glob

12

i m p o r t n u m p y as np

13

14

" " " p a t h _ r e s u l t : s t o r i n g the r e s u l t in a s p e c i f i c d i r e c t o r y

15

s e g _ p a t h : d i f f e r e n t s e g m e n t e d i m a g e s

16

p a t i e n t I D : list of patiend ’ s ID - Task 1/2

17

t r a i n _ t a s k 1 : N I F T I i m a g e s for task 1 d a t a s e t

18

s e q u e n c e _ t y p e s : s e q u e n c e type of mMRI p i c t u r e s

19

" " "

20 21

22

c l a s s P y r a d i o m i c s :

23

def _ _ i n i t _ _ ( self , p a t h _ r e s u l t , seg_path ,

t r a i n _ t a s k 1 , s e q u e n c e _ t y p e s , p a t i e n t I D ) :

24

25

self . p a t h _ r e s u l t = r " . . . [ path to s t o r e the r e s u l t ]"

26

self . s e g _ p a t h = r "... [ path to s e g m e n t e d d a t a s e t ]"

27

self . t r a i n _ t a s k 1 = r " path to task 1 d a t a s e t "

28

self . s e q u e n c e _ t y p e s = [ " f l a i r " , " t1 " , "

t1ce " , " t2 " ]

29

self . p a t i e n t I D = os . l i s t d i r (

30

r "... [ path to list of patient ’ s ID

of task 1 / 2 ] "

31

)

32

33

" " " << g e n e r a t e _ c s v > > f u n c t i o n g e n e r a t e a csv file with two c o l u m n s : I m a g e and Mask

34

d i r e c t o r y of d i f f e r e n t s e q u e n c e type and d i f f e r e n t s e g m e n t e d b r a i n

35

t u m o r will be a d d e d as a new row to the

csv file .

36

37

o u t c o m e :

38

c r e a t e a csv file with o r i g i n a l

i m a g e and d i f f e r e n t m a s k s .

39

" " "

40

41

def g e n e r a t e _ c s v ( self ) :

42

with open (

43

os . path . join ( self . p a t h _ r e s u l t , "

r a d i o m i c s _ f e a t u r e s _ t a s k 1 . csv " ) ,

44

" a " ,

45

n e w l i n e = " " ,

46

) as c s v f i l e :

47

48

# c r e a t i n g the c o l u m n h e a d s

49

w r i t e r = csv . w r i t e r ( c s v f i l e )

50

w r i t e r . w r i t e r o w ([ " I m a g e " , " Mask " ])

51

52

# f i l l i n g each cell with the path

of i m a g e and mask

53

try :

54

for p a t i e n t in self . p a t i e n t I D :

55

for t y p e s in self .

s e q u e n c e _ t y p e s :

56

57

img = os . path . join (

58

self . t r a i n _ t a s k 1 ,

59

p a t i e n t + " \\ " +

p a t i e n t + " _ " + t y p e s + " . nii . gz " ,

60

)

61

62

mask = os . path . join (

63

self . t r a i n _ t a s k 1 ,

p a t i e n t + " \\ " + p a t i e n t + " _seg . nii . gz "

64

)

65

m a s k _ E D = os . path . join (

66

self . t r a i n _ t a s k 1 ,

67

p a t i e n t + " \\ " +

p a t i e n t + " _ s e g _ E D . nii . gz " ,

68

)

69

m a s k _ E T = os . path . join (

70

self . t r a i n _ t a s k 1 ,

71

p a t i e n t + " \\ " +

p a t i e n t + " _ s e g _ E T . nii . gz " ,

72

)

73

m a s k _ N C R = os . path . join

(

74

self . t r a i n _ t a s k 1 ,

75

p a t i e n t + " \\ " + p a t i e n t + " _ s e g _ N C R . nii . gz " ,

76

)

77

78

w r i t e r . w r i t e r o w ([ img ] +

[ mask ])

79

w r i t e r . w r i t e r o w ([ img ] +

[ m a s k _ E D ])

80

w r i t e r . w r i t e r o w ([ img ] +

[ m a s k _ E T ])

81

w r i t e r . w r i t e r o w ([ img ] +

[ m a s k _ N C R ])

82

e x c e p t :

83

pass

84

85

" " " S e p a r a t i n g the s e g m e n t e d i m a g e s of task 1 into four p a r t s :

86

1. i m a g e with the w h o l e t u m o r ( l a b e s :

1 + 2 + 4 )

87

2. i m a g e with n e c r o t i c ( NCR ) p a r t s of

the t u m o r ( l a b e s : 1)

88

3. i m a g e with p e r i t u m o r a l e d e m a t o u s / i n v a d e d t i s s u e ( ED ) ( l a b e s : 2)

89

4. i m a g e with e n h a n c i n g t u m o r ( ET ) (

l a b e s : 4)

90

5. i m a g e with t u m o r core ( COR ) ( l a b e l s

: 1+4)

91

" " "

92

93

def s e p a r a t e _ s e g ( self ) :

94

# %% loop t h r o u g h the i n p u t path f o l d e r to s e p a r a t e d i f f e r e n t area

95

# and r e l a b l e it to 1 for f e a t u r e

e x t r a c t i o n

96

97

for p a t i e n t in self . p a t i e n t I D :

98

99

# r e a d i n g the o r i g i n a l i m a g e

100

img = sitk . R e a d I m a g e (

101

os . path . join (

102

self . t r a i n _ t a s k 1 ,

103

p a t i e n t + " / " + p a t i e n t + "

_seg . nii . gz " ,

104

)

105

)

106

107

# g e t t i n g the i m a g e data

108

i m g _ d a t a = sitk . G e t A r r a y F r o m I m a g e ( img )

109

110

# r e l a b e l the w h o l e t u m o r to 1

111

i m g _ w h o l e _ d a t a = np . w h e r e (( i m g _ d a t a

!= 0) , 1 , i m g _ d a t a )

112

113

# k e e p i n g the n e r o t i c part of the t u m o r and r e m o v i n g the rest

114

i m g _ N C R _ d a t a = np . w h e r e (( i m g _ d a t a

!= 1) , 0 , i m g _ d a t a )

115

116

# e x t r a c t i n g the e d e m a ( l a b e l 2) and r e l a b e l it to 1

117

i m g _ E D _ d a t a = np . w h e r e (

118

( np . w h e r e (( i m g _ d a t a != 2) , 0 ,

i m g _ d a t a ) == 2) ,

119

1 ,

120

np . w h e r e (( i m g _ d a t a != 2) , 0 ,

i m g _ d a t a ) ,

121

)

122

123

# e x t r a c t i n g the e n h a c i n g part of the t u m o r ( l a b e l 4) and r e l a b e l it to 1

124

i m g _ E T _ d a t a = np . w h e r e (

125

( np . w h e r e (( i m g _ d a t a != 4) , 0 ,

i m g _ d a t a ) == 4) ,

126

1 ,

127

np . w h e r e (( i m g _ d a t a != 4) , 0 ,

i m g _ d a t a ) ,

128

)

129

130

# e x t r a c t i n g the core of the t u m o r ( l a b e l 1 & 4) and r e l a b e l it to 1

131

i m g _ C O R _ d a t a = (

132

np . w h e r e (( i m g _ d a t a == 2) , 0 ,

i m g _ d a t a )

133

& np . w h e r e (( i m g _ d a t a == 1) , 1 , i m g _ d a t a )

134

& np . w h e r e (( i m g _ d a t a == 4) , 1 , i m g _ d a t a )

135

)

136

137

# g e t t i n g the m e t a d a t a of the o r i g i n a l i m a g e and a s s i g n it to

138

# new s e g m e n t e d area as N I F T I file

139

i m g _ w h o l e = sitk . G e t I m a g e F r o m A r r a y ( i m g _ w h o l e _ d a t a )

140

i m g _ w h o l e . C o p y I n f o r m a t i o n ( img )

141

i m g _ N C R = sitk . G e t I m a g e F r o m A r r a y ( i m g _ N C R _ d a t a )

142

i m g _ N C R . C o p y I n f o r m a t i o n ( img )

143

i m g _ E D = sitk . G e t I m a g e F r o m A r r a y ( i m g _ E D _ d a t a )

144

i m g _ E D . C o p y I n f o r m a t i o n ( img )

145

i m g _ E T = sitk . G e t I m a g e F r o m A r r a y ( i m g _ E T _ d a t a )

146

i m g _ E T . C o p y I n f o r m a t i o n ( img )

147

i m g _ C O R = sitk . G e t I m a g e F r o m A r r a y ( i m g _ C O R _ d a t a )

148

i m g _ C O R . C o p y I n f o r m a t i o n ( img )

149

150

# s a v i n g all the n i f t i f i l e s in o u t p u t path

151

sitk . W r i t e I m a g e (

152

i m g _ w h o l e ,

153

os . path . join (

154

self . t r a i n _ t a s k 1 , p a t i e n t +

" / " + p a t i e n t + " _ s e g _ w h o l e . nii . gz "

155

) ,

156

)

157

158

sitk . W r i t e I m a g e (

159

img_NCR ,

160

os . path . join (

161

self . t r a i n _ t a s k 1 , p a t i e n t +

" / " + p a t i e n t + " _ s e g _ N C R . nii . gz "

162

) ,

163

)

164

165

sitk . W r i t e I m a g e (

166

img_ED ,

167

os . path . join (

168

self . t r a i n _ t a s k 1 , p a t i e n t +

" / " + p a t i e n t + " _ s e g _ E D . nii . gz "

169

) ,

170

)

171

172

sitk . W r i t e I m a g e (

173

img_ET ,

174

os . path . join (

175

self . t r a i n _ t a s k 1 , p a t i e n t +

" / " + p a t i e n t + " _ s e g _ E T . nii . gz "

176

) ,

177

)

178

179

sitk . W r i t e I m a g e (

180

img_COR ,

181

os . path . join (

182

self . t r a i n _ t a s k 1 , p a t i e n t +

" / " + p a t i e n t + " _ s e g _ C O R . nii . gz "

183

) ,

184

)

185

186

" " " << f e a t u r e _ e x t r a c t i o n > > f u n c t i o n uses the csv file w h i c h c o n t a i n s the

187

d i r e c t o r y to s e g m e n t e d i m a g e s and d i f f e r e m t m a s k s in o r d e r to e x t r a c t

188

r a d i o m i c s f e a t u r e s of each n i f t i i m a g e .

189

190

o u t c o m e :

191

c r e a t e a csv file with all the

f e a t u r e s r e l a t e d to o r i g a n l i m a g e s ( and

192

its f i l t e r s )

193

" " "

194

195

def f e a t u r e _ e x t r a c t i o n () :

196

197

os . c h d i r ( r "... [ path to save the p y r a d i o m i c s r e s u l t s ]")

198

o u t P a t h = r "... [ path to save the p y r a d i o m i c s r e s u l t s ]"

199

200

f i l e s c s v = glob . glob ( "

r a d i o m i c s _ f e a t u r e s _ t a s k 1 . csv " )

201

202

# f i l e s c s v = glob . glob ( ’ R a d i o m i c s _ *. csv ’)

203

for i n F i l e in f i l e s c s v [:]:

204

i n p u t C S V = os . path . join ( outPath ,

i n F i l e )

205

o u t p u t F i l e p a t h = os . path . join ( outPath , " R e s u l t s _ " + i n F i l e )

206

p r o g r e s s _ f i l e n a m e = os . path . join ( outPath , " p y r a d _ l o g . txt " )

207

p a r a m s = os . path . join ( outPath , "

e x a m p l e S e t t i n g s " , " P a r a m s . yaml " )

208

209

# C o n f i g u r e l o g g i n g

210

r L o g g e r = l o g g i n g . g e t L o g g e r ( "

r a d i o m i c s " )

211

212

# Set l o g g i n g l e v e l

213

# r L o g g e r . s e t L e v e l ( l o g g i n g . INFO ) # Not needed , d e f a u l t log l e v e l of l o g g e r is INFO

214

215

# C r e a t e h a n d l e r for w r i t i n g to log file

216

h a n d l e r = l o g g i n g . F i l e H a n d l e r ( f i l e n a m e = p r o g r e s s _ f i l e n a m e , mode = " w " )

217

h a n d l e r . s e t F o r m a t t e r (

218

l o g g i n g . F o r m a t t e r ( " %( l e v e l n a m e ) s :%( name ) s : %( m e s s a g e ) s " )

219

)

220

r L o g g e r . a d d H a n d l e r ( h a n d l e r )

221

222

# I n i t i a l i z e l o g g i n g for b a t c h log m e s s a g e s

223

l o g g e r = r L o g g e r . g e t C h i l d ( " b a t c h " )

224

225

# Set v e r b o s i t y l e v e l for o u t p u t to s t d e r r ( d e f a u l t l e v e l = W A R N I N G )

226

r a d i o m i c s . s e t V e r b o s i t y ( l o g g i n g . INFO )

227

228

l o g g e r . info ( " p y r a d i o m i c s v e r s i o n : % s " , r a d i o m i c s . _ _ v e r s i o n _ _ )

229

l o g g e r . info ( " L o a d i n g CSV ")

230

231

f l i s t s = []

232

try :

233

with open ( inputCSV , " r " ) as

i n F i l e :

234

cr = csv . D i c t R e a d e r ( inFile ,

l i n e t e r m i n a t o r = " \ n " )

235

f l i s t s = [ row for row in cr

]

236

e x c e p t E x c e p t i o n :

237

l o g g e r . e r r o r ( " CSV READ F A I L E D " , e x c _ i n f o = True )

238

239

l o g g e r . info ( " L o a d i n g Done " )

240

l o g g e r . info ( " P a t i e n t s : % d " , len ( f l i s t s ) )

241

242

if os . path . i s f i l e ( p a r a m s ) :

243

e x t r a c t o r = f e a t u r e e x t r a c t o r . R a d i o m i c s F e a t u r e E x t r a c t o r ( p a r a m s )

244

else : # P a r a m e t e r file not found ,

use h a r d c o d e d s e t t i n g s i n s t e a d

245

s e t t i n g s = {}

246

# s e t t i n g s [ ’ binWidth ’] = 25

247

# s e t t i n g s [ ’

r e s a m p l e d P i x e l S p a c i n g ’] = [0.75 , 0.75 , 1] # [3 ,3 ,3]

248

# s e t t i n g s [ ’ i n t e r p o l a t o r ’] = sitk . s i t k B S p l i n e

249

# s e t t i n g s [ ’ c o r r e c t M a s k ’] = True

250

s e t t i n g s [ " g e o m e t r y T o l e r a n c e " ] = 1

251

# s e t t i n g s [ ’ e n a b l e C E x t e n s i o n s ’]

= True

252

253

e x t r a c t o r = f e a t u r e e x t r a c t o r . R a d i o m i c s F e a t u r e E x t r a c t o r (** s e t t i n g s )

254

# e x t r a c t o r . e n a b l e I n p u t I m a g e s ( w a v e l e t = { ’ level ’: 2})

255

256

# l o g g e r . info ( ’ E n a b l e d i n p u t i m a g e s t y p e s : % s ’ , e x t r a c t o r .

e n a b l e d I m a g e T y p e s )

257

# l o g g e r . info ( ’ E n a b l e d f e a t u r e s : % s ’ , e x t r a c t o r . e n a b l e d F e a t u r e s )

258

# l o g g e r . info ( ’ C u r r e n t s e t t i n g s : % s ’ , e x t r a c t o r . s e t t i n g s )

259

260

h e a d e r s = None

261

262

for idx , e n t r y in e n u m e r a t e ( flists , s t a r t =1) :

263

264

l o g g e r . info (

265

" (% d /% d ) P r o c e s s i n g P a t i e n t ( I m a g e : % s , Mask : % s ) " ,

266

idx ,

267

len ( f l i s t s ) ,

268

e n t r y [ " I m a g e " ] ,

269

e n t r y [ " Mask " ] ,

270

)

271

272

i m a g e F i l e p a t h = e n t r y [ " I m a g e " ]

273

m a s k F i l e p a t h = e n t r y [ " Mask " ]

274

l a b e l = e n t r y . get ( " L a b e l " , None )

275

276

if str ( l a b e l ) . i s d i g i t () :

277

l a b e l = int ( l a b e l )

278

else :

279

l a b e l = None

280

281

if ( i m a g e F i l e p a t h is not None )

and ( m a s k F i l e p a t h is not None ) :

282

f e a t u r e V e c t o r = c o l l e c t i o n s

. O r d e r e d D i c t ( e n t r y )

283

f e a t u r e V e c t o r [ " I m a g e " ] = os . path . b a s e n a m e ( i m a g e F i l e p a t h )

284

f e a t u r e V e c t o r [ " Mask "] = os . path . b a s e n a m e ( m a s k F i l e p a t h )

285

286

try :

287

f e a t u r e V e c t o r . u p d a t e (

288

e x t r a c t o r . e x e c u t e (

i m a g e F i l e p a t h , m a s k F i l e p a t h , l a b e l )

289

)

290

291

with open ( o u t p u t F i l e p a t h , " a " ) as o u t p u t F i l e :

292

w r i t e r = csv . w r i t e r

( o u t p u t F i l e , l i n e t e r m i n a t o r = " \ n " )

293

if h e a d e r s is None :

294

h e a d e r s = list (

f e a t u r e V e c t o r . keys () )

295

w r i t e r . w r i t e r o w

( h e a d e r s )

296

297

row = []

298

for h in h e a d e r s :

299

row . a p p e n d (

f e a t u r e V e c t o r . get ( h , " N / A " ) )

300

w r i t e r . w r i t e r o w ( row

)

301

e x c e p t E x c e p t i o n :

302

l o g g e r . e r r o r ( " F E A T U R E

E X T R A C T I O N F A I L E D " , e x c _ i n f o = True )

libraries_pyradiomics.py

DataProcess class tries to generate a clean dataset before applying any Machine Learning classification method. numeric_data will keep only numerical data type and remove the other unusable types. The other functions such as mean_conf or make_confusion_matrix used to get confusion matrix in proper way through different nested folds algorithms. top_features functions and also low_variance considered for choosing best and the most informative features.

1

" " "

2

l i b r a r i e s : data p r o c e s s i n g and a p p l y i n g ML a l g o r i t h m s

3

" " "

4

i m p o r t os

5

i m p o r t p a n d a s as pd

6

i m p o r t n u m p y as np

7

from s k l e a r n . tree i m p o r t D e c i s i o n T r e e C l a s s i f i e r

8

from s k l e a r n . m o d e l _ s e l e c t i o n i m p o r t K F o l d

9

from x g b o o s t i m p o r t X G B C l a s s i f i e r

10

from s k l e a r n . f e a t u r e _ s e l e c t i o n i m p o r t V a r i a n c e T h r e s h o l d

11

i m p o r t m a t p l o t l i b . p y p l o t as plt

12

from s k l e a r n . m o d e l _ s e l e c t i o n i m p o r t S t r a t i f i e d K F o l d

13

i m p o r t s e a b o r n as sns

14

from s k l e a r n . f e a t u r e _ s e l e c t i o n i m p o r t RFE

15

from s k l e a r n . f e a t u r e _ s e l e c t i o n i m p o r t R F E C V

16

17

" " " s e p a r a t e d _ e x t r a c t e d _ d a t a _ l i s t : list of s e p a r a t e d csv file n a m e s

18

s e p a r a t e d _ e x t r a c t e d _ d a t a _ p a t h : path of s e p a r a t e d csv file n a m e s

19

" " "

20

21

c l a s s D a t a P r o c e s s :

22

def _ _ i n i t _ _ (

23

self , s e p a r a t e d _ e x t r a c t e d _ d a t a _ l i s t , s e p a r a t e d _ e x t r a c t e d _ d a t a _ p a t h , s a v e _ p a t h

24

) :

25

26

self . s e p a r a t e d _ e x t r a c t e d _ d a t a _ l i s t = os . l i s t d i r (

27

r "... [ path to list of s e p a r a t e e d

csv f i l e s name ]"

28

)

29

self . s e p a r a t e d _ e x t r a c t e d _ d a t a _ p a t h = (

30

r " . . . [ path to d i r e c t o r y of the s e p a r a t e d csv f i l e s ]"

31

)

32

self . s a v e _ p a t h = r "... [ path to s t o r e the r e s u l t ]"

33

34

def n u m e r i c _ d a t a ( self , f i l e _ n a m e ) :

35

36

" " " << n u m e r i c _ d a t a > >: this f u n c t i o n help to e l i m i n a t e the c o l u m n s w h i c h does not

have

37

n u m e r i c type of data :

38

39

p a r a m e t e r - - > file ( csv )

40

r e t u r n - - > o u t p u t d a t a s e t with only n u m e r i c a l data type

41

" " "

42

43

d f _ r a w = pd . r e a d _ c s v ( self .

s e p a r a t e d _ e x t r a c t e d _ d a t a _ p a t h + f i l e _ n a m e )

44

45

# e x c l u d i n g the i m a g e IDs

46

df = d f _ r a w . drop ([ " I m a g e " ] , axis =1)

47

48

# e x c l u d i n g all the o b j e c t t y p e s of data + a d d i n g back the ID c o l u m n

49

d f _ n u m e r i c = df . s e l e c t _ d t y p e s ( e x c l u d e = "

o b j e c t " )

50

d f _ n u m e r i c . i n s e r t (0 , " I m a g e " , d f _ r a w [ "

I m a g e " ])

51

52

r e t u r n d f _ n u m e r i c

53

54

def l o w _ v a r i a n c e ( self , f i l e _ n a m e ) :

55

56

" " "

57

<< l o w _ v a r i a n c e > >: this f u n c t i o n c h e c k feathurs ’ v a r i a n c e and b a s e d on

58

the d e f i n e d t h r e s h ol d , r e m o v the low

v a r i a n c e f e a t u r e s w h i c h give less

59

i n f o r m a t i o n a b o u t the data ( the n u m e r i c data that has o b t a i n e d .)

60

" " "

61

62

# g e t t i n g n u m e r i c a l data with n u m e r i c _ d a t a f u n c t i o n

63

d f _ n u m e r i c = D a t a P r o c e s s . n u m e r i c _ d a t a ( f i l e _ n a m e )

64

65

# R e m o v i n g both c o n s t a n t and quasi -c o n s t a n t f e a t u r e s

66

v a r _ t h r = V a r i a n c e T h r e s h o l d ( t h r e s h o l d

= 0 . 2 5 )

67

v a r _ t h r . fit ( d f _ n u m e r i c )

68

69

c o n c o l = [

70

c o l u m n

71

for c o l u m n in d f _ n u m e r i c . c o l u m n s

72

if c o l u m n not in d f _ n u m e r i c . c o l u m n s [ v a r _ t h r . g e t _ s u p p o r t () ]

73

]

74

d f _ n u m e r i c = d f _ n u m e r i c . drop ( concol , axis =1)

75

76

# save the new u p d a t e d d a t a s e t

77

d f _ n u m e r i c . t o _ c s v ( os . path . join ( self . sav e_ p a t h , " c l e a n e d _ " + f i l e _ n a m e ) )

78

79

def m e a n _ c o n f ( self , c o n f u s i o n _ m a t r i x ) :

80

81

" " "

82

<< m e a n _ c on f > > s i m p l y gets the mean of each e l e m e n t in c o n f i u s i o n m a t r i x s

83

w h i c h are the o u t c o m e in each k - s u b s e t

c r o s s v a l i d a i o n .

84

85

r e t u r n - - > mean of all c o n f i s i o n

m a t r i c s

86

" " "

87

# e m p t y l i s t s to fill up e v e r y e l e m e n t s of the d i f f e r e n t c o n f u s i o n m a t r i c s

88

e1 , e2 , e3 , e4 = [] , [] , [] , []

89

for i in r a n g e (0 , len ( c o n f u s i o n _ m a t r i x ) ) :

90

e1 . a p p e n d ( c o n f u s i o n _ m a t r i x [ i ] [ 0 ] [ 0 ] )

91

e2 . a p p e n d ( c o n f u s i o n _ m a t r i x [ i ] [ 0 ] [ 1 ] )

92

e3 . a p p e n d ( c o n f u s i o n _ m a t r i x [ i ] [ 1 ] [ 0 ] )

93

e4 . a p p e n d ( c o n f u s i o n _ m a t r i x [ i ] [ 1 ] [ 1 ] )

94

# g e t t i n g mean of each e l e m e n t

95

m e a n _ m a t r i x = [

96

[ r o u n d ( np . mean ( e1 ) ) , r o u n d ( np . mean ( e2 ) ) ] ,

97

[ r o u n d ( np . mean ( e3 ) ) , r o u n d ( np . mean ( e4 ) ) ] ,

98

]

99

r e t u r n m e a n _ m a t r i x

100

101

def m a k e _ c o n f u s i o n _ m a t r i x ( cf , g r o u p _ n a m e s , c a t e g o r i e s , t i t l e ) :

102

" " "

103

This f u n c t i o n will make a p r e t t y plot of an s k l e a r n C o n f u s i o n M a t r i x cm u s i n g

104

a S e a b o r n h e a t m a p v i s u a l i z a t i o n .

105

106

P a r a m e t e r s

107

-108

cf : c o n f u s i o n m a t r i x to be

p a s s e d in

109

g r o u p _ n a m e s : List of s t r i n g s that

r e p r e s e n t the l a b e l s row by row to be s h o w n in each s q u a r e .

110

c a t e g o r i e s : List of s t r i n g s

c o n t a i n i n g the c a t e g o r i e s to be d i s p l a y e d on the x , y axis .

111

cmap : C o l o r m a p of the v a l u e s

d i s p l a y e d from m a t p l o t l i b . p y p l o t . cm .

112

See http :// m a t p l o t l i b .

org / e x a m p l e s / c o l o r / c o l o r m a p s _ r e f e r e n c e . html

113

t i t l e : T i t l e for the h e a t m a p .

114

115

R e t u r n s

116

-117

None

118

" " "

119

120

g r o u p _ l a b e l s = [ " {}\ n ". f o r m a t ( v a l u e ) for v a l u e in g r o u p _ n a m e s ]

121

122

g r o u p _ p e r c e n t a g e s = [

123

" {:.2 f } " . f o r m a t ( v a l u e ) for v a l u e in cf . f l a t t e n () / np . sum ( cf )

124

]

125

g r o u p _ c o u n t s = [ f "{ r o u n d ( 5 5 9 * v a l u e ) }\ n "

for v a l u e in cf . f l a t t e n () ]

126

127

b o x _ l a b e l s = [

128

f "{ v1 }{ v2 }{ v3 }". s t r i p ()

129

for v1 , v2 , v3 in zip ( g r o u p _ l a b e l s , g r o u p _ c o u n t s , g r o u p _ p e r c e n t a g e s )

130

]

131

b o x _ l a b e l s = np . a s a r r a y ( b o x _ l a b e l s ) . r e s h a p e ( cf . s h a p e [0] , cf . s h a p e [1])

132

133

# A c c u r a c y is sum of d i a g o n a l d i v i d e d by t o t a l o b s e r v a t i o n s

134

a c c u r a c y = np . t r a c e ( cf ) / f l o a t ( np . sum ( cf ) )

135

136

# M e t r i c s for B i n a r y C o n f u s i o n M a t r i c e s

137

p r e c i s i o n = cf [1 , 1] / sum ( cf [: , 1])

138

r e c a l l = cf [1 , 1] / sum ( cf [1 , :])

139

f 1 _ s c o r e = 2 * p r e c i s i o n * r e c a l l / ( p r e c i s i o n + r e c a l l )

140

s t a t s _ t e x t = " \ n \ n A c c u r a c y = { : 0 . 3 f }\

n P r e c i s i o n = { : 0 . 3 f }\ n R e c a l l = { : 0 . 3 f }\ nF1 S c o r e

= { : 0 . 3 f } " . f o r m a t (

141

accuracy , p r e c i s i on , recall ,

f 1 _ s c o r e

142

)

143

144

# Make the h e a t m a p v i s u a l i z a t i o n

145

plt . f i g u r e ( f i g s i z e = None )

146

sns . h e a t m a p (

147

cf ,

148

a n n o t = b o x _ l a b e l s ,

149

fmt = " " ,

150

cmap = " B l u e s " ,

151

cbar = True ,

152

x t i c k l a b e l s = " auto " ,

153

y t i c k l a b e l s = c a t e g o r i e s ,

154

vmin =0 ,

155

vmax =1 ,

156

)

157

158

plt . y l a b e l ( " True l a b e l " )

159

plt . x l a b e l ( " P r e d i c t e d l a b e l " + s t a t s _ t e x t )

160

161

plt . t i t l e ( t i t l e )

162

163

def c o r r e l a t i o n _ m a p ( df , l i s t _ i m _ f e a t , s a v e _ p a t h = None , t i t l e = None ) :

164

" " "

165

P a r a m e t e r s

166

-167

df : D a t a F r a m e

168

d a t a f r a m e as an i n p u t

169

l i s t _ i m _ f e a t : List

170

set of f e a t u r e s name e x t r a c t e d from

a d a t a f r a m e ( by f e a t u r e s e l e c t o r s )

171

s a v e _ p a t h : str

172

d i r e c t o r y to save the o u t p u t as a

JPG ( d e f a u l t is None )

173

t i t l e : str

174

t i t l e for the h e a t m a p ( d e f a u l t is

None .)

175

176

R e t u r n s

177

-178

None .

179

180

" " "

181

# d i v i d i n g the w h o l e d a t a f r a m e into the s m a l l part c o n t a i n i n g only top f e a t u r e s

182

s e l e c t e d _ d f = df [ l i s t _ i m _ f e a t ]. copy ()

183

184

fig , ax = plt . s u b p l o t s ( f i g s i z e =(15 , 12) )

185

186

# p l o t t i n g c o r r e l a t i o n h e a t m a p

187

d a t a p l o t = sns . h e a t m a p (

188

s e l e c t e d _ d f . corr () , cmap = " Y l G n B u " , a n n o t = True , vmin = -1 , vmax =1

189

)

190

191

if s a v e _ p a t h != None and t i t l e != None :

192

plt . t i t l e ( t i t l e )

193

plt . s a v e f i g ( s a v e _ p ath , dpi =300 , b b o x _ i n c h e s = " t i g h t " )

194

195

# d i s p l a y i n g h e a t m a p

196

plt . show ()

197

198

def t o p _ f e a t u r e s _ X G B ( self , df , n u m b e r _ f e a t , t a r g e t _ v a r ) :

199

200

" " "

201

This f u n c t i o n f i n d s c o m m o n top i

f e a t u r e s ( by X G B o o s t c l a s s i f i e r ) and

202

r e t u r n s them as a list of s t r i n g s that

are the features ’ n a m e s .

203 204

205

P a r a m e t e r s

206

-207

df : D a t a F r a m e

208

d a t a f r a m e as an i n p u t

209

n u m b e r _ f e a t : int

210

n u m b e r of f e a t u r e s n e e d e d to be

r a n k e d

211

t a r g e t _ v a r : str

212

t a r g e t v a r i a b l e

213

214

R e t u r n s

215

-216

i m _ f e a t : list

217

list of i top - r a n k e d f e a t u r e s

218

" " "

219

220

# d e f i n i n g the t a r g e t v a l u e and s e p a r a t e it

221

y = df [ t a r g e t _ v a r ]

222

X = df . drop ([ t a r g e t _ v a r ] , axis =1)

223

224

kf = K F o l d ( n _ s p l i t s =5 , s h u f f l e = True )

225

for t r a i n _ i n d e x , t e s t _ i n d e x in kf . s p l i t ( X ) :

226

X_train , X _ t e s t = X . iloc [

t r a i n _ i n d e x , :] , X . iloc [ t e s t _ i n d e x , :]

227

y_train , y _ t e s t = y . iloc [

t r a i n _ i n d e x ] , y . iloc [ t e s t _ i n d e x ]

228

229

# d e c l a r e p a r a m e t e r s

230

p a r a m s = {

231

" o b j e c t i v e " : " b i n a r y : l o g i s t i c " ,

232

" m a x _ d e p t h " : 4 ,

233

" a l p h a " : 10 ,

234

" l e a r n i n g _ r a t e " : 1.0 ,

235

" n _ e s t i m a t o r s " : 100 ,

236

}

237

238

# i n s t a n t i a t e the c l a s s i f i e r

239

x g b _ c l f = X G B C l a s s i f i e r (** p a r a m s )

240

241

# fit the c l a s s i f i e r to the

t r a i n i n g data

242

x g b _ c l f . fit ( X_train , y _ t r a i n )

243

244

# list of f e a t u r e s name

245

f e a t _ n a m e s = list ( X _ t r a i n . c o l u m n s )

246

247

f e a t s = {} # a dict to hold

f e a t u r e _ n a m e : f e a t u r e _ i m p o r t a n c e

248

for feature , i m p o r t a n c e in zip (

f e a t _ n a m e s , x g b _ c l f . f e a t u r e _ i m p o r t a n c e s _ ) :

249

f e a t s [ f e a t u r e ] = i m p o r t a n c e # add the name / v a l u e pair

250

# a p p e n d i n g the d i c t i o n a r y of

f e a t u r e s with t h e i r s c o r e s by each k s u b s e t

251

f e a t s . u p d a t e ({ x : y for x , y in f e a t s . i t e m s () if y != 0})

252

253

# sort the f e a t u r e s b a s e d on t h e i r i m p o r t a n c e

254

i m _ f e a t = s o r t e d ( f e a t s . i t e m s () , key = l a m b d a f e a t s : f e a t s [1] , r e v e r s e = True ) [

255

: n u m b e r _ f e a t

256

]

257

# i m _ f e a t . sort ( key = l a m b d a x : x [1] , r e v e r s e = True )

258

i m _ f e a t = [ item for s u b l i s t in i m _ f e a t for item in s u b l i s t ]

259

i m _ f e a t = [ elm for elm in i m _ f e a t if

i s i n s t a n c e ( elm , str ) ]

260

261

# the list of most i - th top r a n k e d f e a t u r e s

262

r e t u r n i m _ f e a t

263

264

def t o p _ f e a t u r e s _ R F E ( self , df , n u m b e r _ f e a t , t a r g e t _ v a r ) :

265

" " "

266

This f u n c t i o n f i n d s top f e a t u r e s u s i n g the R e c u r s i v e F e a t u r e E l i m i n a t i o n

267

a p p r o a c h and r e t u r n s a list of str .

268

269

P a r a m e t e r

270

-271

df : D a t a F r a m e

272

d a t a f r a m e as an i n p u t

273

n u m b e r _ f e a t : int

274

n u m b e r of f e a t u r e s n e e d e d to be

r a n k e d

275

t a r g e t _ v a r : str

276

t a r g e t v a r i a b l e

277

278

R e t u r n

279

-280

b e s t _ f e a t : list

281

list of top features ’ name

282

283

" " "

284

285

# d e f i n i n g the t a r g e t v a l u e and s e p a r a t e it

286

y = df [ t a r g e t _ v a r ]

287

X = df . drop ([ t a r g e t _ v a r ] , axis =1)

288

289

b e s t _ f e a t = [] # list of faetures ’ name

290

291

rfe = RFE (

292

e s t i m a t o r = D e c i s i o n T r e e C l a s s i f i e r () , step =1 , n _ f e a t u r e s _ t o _ s e l e c t = n u m b e r _ f e a t

293

)

294

295

# fit RFE

296

rfe . fit ( X , y )

297

298

# get the s c o r e for the top s e l e c t e d f e a t u r e s

299

f e a t u r e _ i m p o r t a n c e = rfe . e s t i m a t o r _ . f e a t u r e _ i m p o r t a n c e s _

300

# sort the r e a n k i n g out with its i n d e x n u m b e r ( f i r s t one is the best f e a t u r e )

301

f e a t u r e _ i m p o r t a n c e _ s o r t e d = s o r t e d (

302

e n u m e r a t e ( f e a t u r e _ i m p o r t a n c e ) , key = l a m b d a x : x [1] , r e v e r s e = True

303

)

304

# e x t r a c t the i n d e x of r a n k i n g a m o n g the top f e a t u r e s

305

t o p _ n _ i d x = [ idx for idx , _ in

f e a t u r e _ i m p o r t a n c e _ s o r t e d [:]]

306

307

# b a s e d on i n d e x get the name of the f e a t u r e s

308

t o p _ n _ f e a t _ i d x = [ rfe . g e t _ s u p p o r t (1) [ i ] for i in t o p _ n _ i d x ]

309

310

for item in t o p _ n _ f e a t _ i d x :

311

b e s t _ f e a t . a p p e n d ( X . iloc [: , int ( item

) ]. name )

312

313

r e t u r n b e s t _ f e a t [: n u m b e r _ f e a t ]

314

315

def t o p _ f e a t u r e s _ R F E C V ( self , df , n u m b e r _ f e a t , t a r g e t _ v a r ) :

316

" " "

317

This f u n c t i o n f i n d s top f e a t u r e s u s i n g the R e c u r s i v e F e a t u r e E l i m i n a t i o n

318

in a cross - v a l i d a t i o n loop to find the

o p t i m a l n u m b e r of f e a t u r e s and r e t u r n s

319

them as a list of str .

320

321

P a r a m e t e r

322

-323

df : D a t a F r a m e

324

d a t a f r a m e as an i n p u t

325

n u m b e r _ f e a t : int

326

n u m b e r of f e a t u r e s n e e d e d to be

r a n k e d

327

t a r g e t _ v a r : str

328

t a r g e t v a r i a b l e

329

330

R e t u r n

331

-332

b e s t _ f e a t : list

333

list of top features ’ name

334

335

" " "

336

337

# d e f i n i n g the t a r g e t v a l u e and s e p a r a t e it

338

y = df [ t a r g e t _ v a r ]

339

X = df . drop ([ t a r g e t _ v a r ] , axis =1)

340

341

b e s t _ f e a t = [] # list of faetures ’ name

342

343

# d e f i n e R F E C V

344

r f e c v = R F E C V (

345

e s t i m a t o r = D e c i s i o n T r e e C l a s s i f i e r () ,

346

cv = S t r a t i f i e d K F o l d (5) ,

347

s c o r i n g = " a c c u r a c y " ,

348

m i n _ f e a t u r e s _ t o _ s e l e c t = n u m b e r _ f e a t ,

349

)

350

351

# fit R F E C V

352

r f e c v . fit ( X , y )

353

354

# get the s c o r e for the top s e l e c t e d f e a t u r e s

355

f e a t u r e _ i m p o r t a n c e = r f e c v . e s t i m a t o r _ . f e a t u r e _ i m p o r t a n c e s _

356

# sort the r e a n k i n g out with its i n d e x n u m b e r ( f i r s t one is the best f e a t u r e )

357

f e a t u r e _ i m p o r t a n c e _ s o r t e d = s o r t e d (

358

e n u m e r a t e ( f e a t u r e _ i m p o r t a n c e ) , key = l a m b d a x : x [1] , r e v e r s e = True

359

)

360

# e x t r a c t the i n d e x of r a n k i n g a m o n g the top f e a t u r e s

361

t o p _ n _ i d x = [ idx for idx , _ in

f e a t u r e _ i m p o r t a n c e _ s o r t e d [:]]

362

363

# b a s e d on i n d e x get the name of the f e a t u r e s

364

t o p _ n _ f e a t _ i d x = [ r f e c v . g e t _ s u p p o r t (1) [ i ] for i in t o p _ n _ i d x ]

365

for item in t o p _ n _ f e a t _ i d x :

366

b e s t _ f e a t . a p p e n d ( X . iloc [: , int ( item ) ]. name )

367

368

r e t u r n b e s t _ f e a t [: n u m b e r _ f e a t ]

libraries_data.py

1

# %% main part

2

" " " S p l i t t i n g the d a t a s e t and a p p l y i n g k - fold c r o s s v a l i d a t i o n

3

F e a t u r e s e l e c t i o n by X G B o o s t m e t h o d

4

F i t t i n g d i f f e r e n t m o d e l : SVM ,

L o g i s t i c R e g r e s s i o n , R a n d o m forest , NN

5

C h a n g i n g the n u m b e r e of f e a t u r e s to see the i d i e a l n u m b e r

6

" " "

7

# d e f i n i n g a new e m p t y d a t a f r a m e to fill with d i f f e r e n t m e t r i c s

8

m e t r i c s = pd . D a t a F r a m e (

9

c o l u m n s =[

10

" f e a t u r e s _ n u m b e r " ,

11

" m e a n _ a c c u r a c y _ N N " ,

12

" s t d _ a c c u r a c y _ N N " ,

13

" m e a n _ f 1 s c o r e _ N N " ,

14

" s t d _ f 1 s c o r e _ N N " ,

15

" c o n f u s i o n _ N N " ,

16

" m e a n _ a c c u r a c y _ S V M " ,

17

" s t d _ a c c u r a c y _ S V M " ,

18

" m e a n _ f 1 s c o r e _ S V M " ,

19

" s t d _ f 1 s c o r e _ S V M " ,

20

" c o n f u s i o n _ S V M " ,

21

" m e a n _ a c c u r a c y _ L R " ,

22

" s t d _ a c c u r a c y _ L R " ,

23

" m e a n _ f 1 s c o r e _ L R " ,

24

" s t d _ f 1 s c o r e _ L R " ,

25

" c o n f u s i o n _ L R " ,

26

" m e a n _ a c c u r a c y _ M L P " ,

27

" s t d _ a c c u r a c y _ M L P " ,

28

" m e a n _ f 1 s c o r e _ M L P " ,

29

" s t d _ f 1 s c o r e _ M L P " ,

30

" c o n f u s i o n _ M L P " ,

31

]

32

)

33

34

# d e f i n i n g a new e m p t y d a t a f r a m e for f i l l i n g best p a r a m e t e r s in each i t e r a t i o n

35

p a r a m e t e r s = pd . D a t a F r a m e (

36

c o l u m n s =[

37

" f e a t u r e s _ n u m b e r " ,

38

" N e a r e s t N e i g h b o r " ,

39

" S u p p o r t V e c t o r M a c h i n e " ,

40

" L o g i s t i c R e g r e s i o n " ,

41

" Multi - l a y e r P e r c e p t r o n " ,

42

" Best S e l e c t e d F e a t u r e s " ,

43

]

44

)

45

46

# copy the d a t a f r a m e for mean and std of m e t r i c s

47

t r a i n _ m e t r i c s = m e t r i c s . copy ()

48

t e s t _ m e t r i c s = m e t r i c s . copy ()

49

50

# r e a d i n g and s p l i t t i n g the e d a t a s e t into t r a i n and test

51

df = pd . r e a d _ c s v ( " / c o n t e n t / d r i v e / M y D r i v e / data / a l l _ b e s t _ d a t a . csv " )

52

53

# g e t t i n g the top f e a t u r e s

54

l i s t _ i m _ f e a t = t o p _ f e a t u r e s _ X G B ( df , 20 , "

M G M T _ v a l u e " )

55

p r i n t ( " c h e c k here : " , l i s t _ i m _ f e a t )

56

57

# d e f i n i n g the t a r g e t v a l u e and s e p a r a t e it

58

y = df [ " M G M T _ v a l u e " ]

59

X = df . drop ([ " M G M T _ v a l u e " , " U n n a m e d : 0 " ] , axis

=1)

60 61

62

# d a t a s e t with best f e a t u r e s

63

X = X [ l i s t _ i m _ f e a t ]. copy ()

64

65

# s p l i t t i n g the w h o l e d a t a s e t into t r a i n ( 8 0 % ) and test ( 2 0 % )

66

X_tr , X_ts , y_tr , y_ts = t r a i n _ t e s t _ s p l i t ( X , y , t e s t _ s i z e =0.2 , r a n d o m _ s t a t e =0)

67

68

# t r a n s f o r m data : f i n a l test

69

s c a l e r = M i n M a x S c a l e r ()

70

X_ts = s c a l e r . f i t _ t r a n s f o r m ( X_ts )

71

72

# c o n v e r t the test set to the d a t a f r a m e in o r d e r to use it in the w h i l e loop

73

X_ts = pd . D a t a F r a m e ( X_ts , c o l u m n s = X_tr . c o l u m n s )

74

75

# a p p l y i n g k - fold c r o s s v a l i d a t i o n ( K =10) - - >

o u t e r loop

76

c v _ o u t e r = K F o l d ( n _ s p l i t s =10 , s h u f f l e = True )

77

78

# i t e r a t i o n over n u m b e r of f e a t u r e s i

79

i = 20

80

w h i l e i != 0:

81

82

# d e f i n i n g p e r f o r m a n c e m e t r i c s l i s t s for t r a i n i n g

83

c o n f _ N N _ t r , c o n f _ S V M _ t r , c o n f _ L R _ t r , c o n f _ M L P _ t r = [] , [] , [] , []

84

acc _ N N _ t r , a c c _ S V M _ t r , acc _LR _ t r , a c c _ M L P _ t r = [] , [] , [] , []

85

f1_NN_tr , f 1 _ S V M _ t r , f1_LR_tr , f 1 _ M L P _ t r = [] , [] , [] , []

86

87

# d e f i n i n g p e r f o r m a n c e m e t r i c s l i s t s for test

88

c o n f _ N N _ t s , c o n f _ S V M _ t s , c o n f _ L R _ t s , c o n f _ M L P _ t s = [] , [] , [] , []

89

acc _ N N _ t s , a c c _ S V M _ t s , acc _LR _ t s , a c c _ M L P _ t s = [] , [] , [] , []

90

f1_NN_ts , f 1 _ S V M _ t s , f1_LR_ts , f 1 _ M L P _ t s = [] , [] , [] , []

91

92

# d e f i n i n g best p a r a m t e r s list to s t o r e for t r a i n g \ test

93

b e s t _ p a r _ N N , b e s t _ p a r _ S V M , b e s t _ p a r _ L R , b e s t _ p a r _ M L P = [] , [] , [] , []

94

95

# c o n f i g u r i n g thee cross - v a l i d a t i o n o u t e r loop

96

for t r a i n _ i n d e x , t e s t _ i n d e x in c v _ o u t e r . s p l i t ( X_tr ) :

97

X_train , X _ t e s t = X_tr . iloc [ t r a i n _ i n d e x , :] , X_tr . iloc [ t e s t _ i n d e x , :]

98

y_train , y _ t e s t = y_tr . iloc [ t r a i n _ i n d e x ] , y_tr . iloc [ t e s t _ i n d e x ]

99

100

# c o n f i g u r i n g the cross - v a l i d a t i o n

p r o c e d u r e ( i n n e r loop )

101

c v _ i n n e r = K F o l d ( n _ s p l i t s =5 , s h u f f l e = True , r a n d o m _ s t a t e =1)

102

103

# keep the most top i r a n k e d f e a t u r e s

104

X _ t r a i n = X _ t r a i n [ l i s t _ i m _ f e a t [: i ]].

copy ()

105

X _ t e s t = X _ t e s t [ l i s t _ i m _ f e a t [: i ]]. copy ()

106

X_ts = X_ts [ l i s t _ i m _ f e a t [: i ]]. copy ()

107

108

# t r a n s f o r m data

109

X _ t r a i n = s c a l e r . f i t _ t r a n s f o r m ( X _ t r a i n )

110

X _ t e s t = s c a l e r . f i t _ t r a n s f o r m ( X _ t e s t )

111

112

# %% N e a r s t n e i g h b o r :

113

# C r e a t e and t r a i n the

K N e i g h b o r s C l a s s i f i e r on the t r a i n \ test set

114

m o d e l _ N N = K N e i g h b o r s C l a s s i f i e r ()

115

116

# Set up p o s s i b l e v a l u e s of p a r a m e t e r s to o p t i m i z e over

117

p a r a m e t e r s _ N N = {

118

" n _ n e i g h b o r s " : [3 , 5 , 11 , 19] ,

119

" w e i g h t s " : [ " u b i f o r m " , " d i s t a n c e " ] ,

120

" m e t r i c " : [" e u c l i d e a n " , " m a n h a t t a n "

] ,

121

}

122

123

# d e f i n e s e a r c h

124

c l a s s i f i e r _ N N = G r i d S e a r c h C V (

125

model_NN , p a r a m e t e r s _ N N , s c o r i n g = "

a c c u r a c y " , cv = cv_inner , r e f i t = True

126

)

127

128

# e x e c u t e s e a r c h

129

r e s u l t _ N N = c l a s s i f i e r _ N N . fit ( X_train , y _ t r a i n )

130

131

# get the best p e r f o r m i n g m o d e l fit on the w h o l e t r a i n i n g \ test set + save the best p a r a m e t e r s

132

b e s t _ m o d e l _ N N = r e s u l t _ N N . b e s t _ e s t i m a t o r _

133

b e s t _ p a r _ N N . a p p e n d ( c l a s s i f i e r _ N N . b e s t _ p a r a m s _ )

134

135

# make a p r e d i c t i o n on the v a l i d a t i o n set and then c h e c k m o d e l p e r f o r m a n c e ( t r a i n )

136

y _ p r e d _ N N = b e s t _ m o d e l _ N N . p r e d i c t ( X _ t r a i n )

137

138

a c c _ N N _ t r . a p p e n d ( a c c u r a c y _ s c o r e ( y_train , y _ p r e d _ N N ) )

139

c o n f _ N N _ t r . a p p e n d ( c o n f u s i o n _ m a t r i x ( y_train , y _ p r e d _ N N , n o r m a l i z e = " all " ) )

140

f 1 _ N N _ t r . a p p e n d ( f 1 _ s c o r e ( y_train , y _ p r e d _ N N ) )

141

142

# make a p r e d i c t i o n on the v a l i d a t i o n set and then c h e c k m o d e l p e r f o r m a n c e ( test )

143

y _ p r e d _ N N = b e s t _ m o d e l _ N N . p r e d i c t ( X_ts )

144

145

a c c _ N N _ t s . a p p e n d ( a c c u r a c y _ s c o r e ( y_ts , y _ p r e d _ N N ) )

146

c o n f _ N N _ t s . a p p e n d ( c o n f u s i o n _ m a t r i x ( y_ts , y_ p re d_ N N , n o r m a l i z e = " all " ) )

147

f 1 _ N N _ t s . a p p e n d ( f 1 _ s c o r e ( y_ts ,

y _ p r e d _ N N ) )

148

149

# %% S u p p o r t V e c t o r M a c h i n e :

150

# b u i l d the SVM c l a s s i f i e r and t r a i n it on the e n t i r e t r a i n i n g \ test data set

151

m o d e l _ S V M = SVC ()

152

153

# Set up p o s s i b l e v a l u e s of p a r a m e t e r s to o p t i m i z e over

154

p a r a m e t e r s _ S V M = {

155

" C " : [0.1 , 1 , 10 , 100 , 1000] ,

156

" g a m m a " : [1 , 0.1 , 0.01 , 0.001 ,

0.0001] ,

157

" k e r n e l " : [" rbf " , " poly " , " s i g m o i d "

] ,

158

}

159

160

# d e f i n e s e a r c h

161

c l a s s i f i e r _ S V M = G r i d S e a r c h C V (

162

m o d e l _ SV M , p a r a m e t e r s _ S V M , s c o r i n g =

" a c c u r a c y " , cv = cv_inner , r e f i t = True

163

)

164

165

# e x e c u t e s e a r c h

166

r e s u l t _ S V M = c l a s s i f i e r _ S V M . fit ( X_train , y _ t r a i n )

167

168

# get the best p e r f o r m i n g m o d e l fit on the w h o l e t r a i n i n g \ test set + save the best p a r a m e t e r s

169

b e s t _ m o d e l _ S V M = r e s u l t _ S V M . b e s t _ e s t i m a t o r _

170

b e s t _ p a r _ S V M . a p p e n d ( c l a s s i f i e r _ S V M . b e s t _ p a r a m s _ )

171

172

# get p r e d i c t i o n s on the test set and s t o r e the p e r f o r m a n c e m e t r i c s ( t r a i n )

173

y _ p r e d _ S V C = b e s t _ m o d e l _ S V M . p r e d i c t ( X _ t r a i n )

174

175

a c c _ S V M _ t r . a p p e n d ( a c c u r a c y _ s c o r e ( y_train , y _ p r e d _ S V C ) )

176

c o n f _ S V M _ t r . a p p e n d ( c o n f u s i o n _ m a t r i x ( y_train , y _ p r e d _ S V C , n o r m a l i z e = " all " ) )

177

f 1 _ S V M _ t r . a p p e n d ( f 1 _ s c o r e ( y_train , y _ p r e d _ S V C ) )

178

179

# get p r e d i c t i o n s on the test set and s t o r e the p e r f o r m a n c e m e t r i c s ( test )

180

y _ p r e d _ S V C = b e s t _ m o d e l _ S V M . p r e d i c t ( X_ts )

181

182

a c c _ S V M _ t s . a p p e n d ( a c c u r a c y _ s c o r e ( y_ts , y _ p r e d _ S V C ) )

183

c o n f _ S V M _ t s . a p p e n d ( c o n f u s i o n _ m a t r i x ( y_ts , y _ p r e d _ S V C , n o r m a l i z e = " all " ) )

184

f 1 _ S V M _ t s . a p p e n d ( f 1 _ s c o r e ( y_ts , y _ p r e d _ S V C ) )

185

186

# %% L o g i s t i c R e g e s s i o n :

187

# b u i l d the c l a s s i f i e r and fit the m o d e l

188

m o d e l _ L R = L o g i s t i c R e g r e s s i o n ()

189

190

# Set up p o s s i b l e v a l u e s of p a r a m e t e r s to o p t i m i z e over

191

p a r a m e t e r s _ L R = {

192

" p e n a l t y " : [ " none " , " l1 " , " l2 " , "

e l a s t i c n e t " ] ,

193

" C " : [0.001 , 0.009 , 0.01 , 0.09 , 1 , 5 , 10 , 25 , 50 , 75 , 100] ,

194

" s o l v e r " : [" newton - cg " , " l b f g s " , "

l i b l i n e a r " ] ,

195

}

196

197

# d e f i n e s e a r c h

198

c l a s s i f i e r _ L R = G r i d S e a r c h C V (

199

model_LR , p a r a m e t e r s _ L R , s c o r i n g = "

a c c u r a c y " , cv = cv_inner , r e f i t = True

200

)

201

202

# e x e c u t e s e a r c h

203

r e s u l t _ L R = c l a s s i f i e r _ L R . fit ( X_train , y _ t r a i n )

204

205

# get the best p e r f o r m i n g m o d e l fit on the w h o l e t r a i n i n g set + save the best

p a r a m e t e r s

206

b e s t _ m o d e l _ L R = r e s u l t _ L R . b e s t _ e s t i m a t o r _

207

b e s t _ p a r _ L R . a p p e n d ( c l a s s i f i e r _ L R . b e s t _ p a r a m s _ )

208

209

# p r e d i c t i o n and s t o r e p e r f o r m a n c e m e t r i c s ( t r a i n )

210

y _ p r e d _ L R = b e s t _ m o d e l _ L R . p r e d i c t ( X _ t r a i n )

211

212

a c c _ L R _ t r . a p p e n d ( a c c u r a c y _ s c o r e ( y_train , y _ p r e d _ L R ) )

213

c o n f _ L R _ t r . a p p e n d ( c o n f u s i o n _ m a t r i x (

y_train , y _ p r e d _ L R , n o r m a l i z e = " all " ) )

214

f 1 _ L R _ t r . a p p e n d ( f 1 _ s c o r e ( y_train , y _ p r e d _ L R ) )

215

216

# p r e d i c t i o n and s t o r e p e r f o r m a n c e m e t r i c s ( test )

217

y _ p r e d _ L R = b e s t _ m o d e l _ L R . p r e d i c t ( X_ts )

218

219

a c c _ L R _ t s . a p p e n d ( a c c u r a c y _ s c o r e ( y_ts , y _ p r e d _ L R ) )

220

c o n f _ L R _ t s . a p p e n d ( c o n f u s i o n _ m a t r i x ( y_ts , y_ p re d_ L R , n o r m a l i z e = " all " ) )

221

f 1 _ L R _ t s . a p p e n d ( f 1 _ s c o r e ( y_ts , y _ p r e d _ L R ) )

222

223

# %% N e u r a l N e t w o r k :

224

# c r e a t e a M L P C l a s s i f i e r and fit the m o d e l

225

m o d e l _ M P L = M L P C l a s s i f i e r (

226

s o l v e r = " l b f g s " , a l p h a =1 e -5 , h i d d e n _ l a y e r _ s i z e s =(6 ,) , r a n d o m _ s t a t e =1

227

)

228

229

# Set up p o s s i b l e v a l u e s of p a r a m e t e r s to o p t i m i z e over

230

p a r a m e t e r s _ M L P = {

231

" b a t c h _ s i z e " : [256] ,

232

" m o m e n t u m " : [0.9 , 0.99] ,

233

" l e a r n i n g _ r a t e _ i n i t " : [0.001 , 0.01 , 0.1] ,

234

" s o l v e r " : [" adam " ] ,

235

" a l p h a " : [0.0001 , 0.05] ,

236

" l e a r n i n g _ r a t e " : [" c o n s t a n t " , "

a d a p t i v e "] ,

237

}

238

239

# d e f i n e s e a r c h

240

c l a s s i f i e r _ M L P = G r i d S e a r c h C V (

241

m o d e l _ MP L , p a r a m e t e r s _ M L P , s c o r i n g =

" a c c u r a c y " , cv = cv_inner , r e f i t = True

242

)

243

244

# e x e c u t e s e a r c h

245

r e s u l t _ M L P = c l a s s i f i e r _ M L P . fit ( X_train , y _ t r a i n )

246

247

# get the best p e r f o r m i n g m o d e l fit on the w h o l e t r a i n i n g set + save the best

p a r a m e t e r s

248

b e s t _ m o d e l _ M L P = r e s u l t _ M L P . b e s t _ e s t i m a t o r _

249

b e s t _ p a r _ M L P . a p p e n d ( c l a s s i f i e r _ M L P . b e s t _ p a r a m s _ )

250

251

# p r e d i c t i o n and s t o r e p r e f o r m a n c e m e t r i c s ( t r a i n )

252

y _ p r e d _ N N = b e s t _ m o d e l _ M L P . p r e d i c t ( X _ t r a i n )

253

254

a c c _ M L P _ t r . a p p e n d ( a c c u r a c y _ s c o r e ( y_train , y _ p r e d _ N N ) )

255

c o n f _ M L P _ t r . a p p e n d ( c o n f u s i o n _ m a t r i x ( y_train , y _ p r e d _ N N , n o r m a l i z e = " all " ) )

256

f 1 _ M L P _ t r . a p p e n d ( f 1 _ s c o r e ( y_train , y _ p r e d _ N N ) )

257

258

# p r e d i c t i o n and s t o r e p r e f o r m a n c e

m e t r i c s ( test )

259

y _ p r e d _ N N = b e s t _ m o d e l _ M L P . p r e d i c t ( X_ts )

260

261

a c c _ M L P _ t s . a p p e n d ( a c c u r a c y _ s c o r e ( y_ts , y _ p r e d _ N N ) )

262

c o n f _ M L P _ t s . a p p e n d ( c o n f u s i o n _ m a t r i x ( y_ts , y _ p r e d_ NN , n o r m a l i z e = " all " ) )

263

f 1 _ M L P _ t s . a p p e n d ( f 1 _ s c o r e ( y_ts , y _ p r e d _ N N ) )

264

265

# s t o r i n g r e s u l t of e v a l u a t i o n m e t r i c s in d a t a f r a m e for f u r t h r e a n l y s i s

266

# t r a i n g i n g r e s u l t s

267

t r a i n _ d a t a _ t o _ s t o r e = {

268

" f e a t u r e s _ n u m b e r " : f "{ i }" ,

269

" m e a n _ a c c u r a c y _ N N " : np . mean ( a c c _ N N _ t r ) ,

270

" s t d _ a c c u r a c y _ N N " : np . std ( a c c _ N N _ t r ) ,

271

" m e a n _ f 1 s c o r e _ N N " : np . mean ( f 1 _ N N _ t r ) ,

272

" s t d _ f 1 s c o r e _ N N " : np . std ( f 1 _ N N _ t r ) ,

273

" c o n f u s i o n _ N N " : m e a n _ c o n f ( c o n f _ N N _ t r ) ,

274

" m e a n _ a c c u r a c y _ S V M " : np . mean ( a c c _ S V M _ t r ) ,

275

" s t d _ a c c u r a c y _ S V M " : np . std ( a c c _ S V M _ t r ) ,

276

" m e a n _ f 1 s c o r e _ S V M " : np . mean ( f 1 _ S V M _ t r ) ,

277

" s t d _ f 1 s c o r e _ S V M " : np . std ( f 1 _ S V M _ t r ) ,

278

" c o n f u s i o n _ S V M " : m e a n _ c o n f ( c o n f _ S V M _ t r ) ,

279

" m e a n _ a c c u r a c y _ L R " : np . mean ( a c c _ L R _ t r ) ,

280

" s t d _ a c c u r a c y _ L R " : np . std ( a c c _ L R _ t r ) ,

281

" m e a n _ f 1 s c o r e _ L R " : np . mean ( f 1 _ L R _ t r ) ,

282

" s t d _ f 1 s c o r e _ L R " : np . std ( f 1 _ L R _ t r ) ,

283

" c o n f u s i o n _ L R " : m e a n _ c o n f ( c o n f _ L R _ t r ) ,

284

" m e a n _ a c c u r a c y _ M L P " : np . mean ( a c c _ M L P _ t r

) ,

285

" s t d _ a c c u r a c y _ M L P " : np . std ( a c c _ M L P _ t r ) ,

286

" m e a n _ f 1 s c o r e _ M L P " : np . mean ( f 1 _ M L P _ t r ) ,

287

" s t d _ f 1 s c o r e _ M L P " : np . std ( f 1 _ M L P _ t r ) ,

288

" c o n f u s i o n _ M L P " : m e a n _ c o n f ( c o n f _ M L P _ t r ) ,

289

}

290

291

# test r e s u l t s

292

t e s t _ d a t a _ t o _ s t o r e = {

293

" f e a t u r e s _ n u m b e r " : f "{ i }" ,

294

" m e a n _ a c c u r a c y _ N N " : np . mean ( a c c _ N N _ t s ) ,

295

" s t d _ a c c u r a c y _ N N " : np . std ( a c c _ N N _ t s ) ,

296

" m e a n _ f 1 s c o r e _ N N " : np . mean ( f 1 _ N N _ t s ) ,

297

" s t d _ f 1 s c o r e _ N N " : np . std ( f 1 _ N N _ t s ) ,

298

" c o n f u s i o n _ N N " : m e a n _ c o n f ( c o n f _ N N _ t s ) ,

299

" m e a n _ a c c u r a c y _ S V M " : np . mean ( a c c _ S V M _ t s ) ,

300

" s t d _ a c c u r a c y _ S V M " : np . std ( a c c _ S V M _ t s ) ,

301

" m e a n _ f 1 s c o r e _ S V M " : np . mean ( f 1 _ S V M _ t s ) ,

302

" s t d _ f 1 s c o r e _ S V M " : np . std ( f 1 _ S V M _ t s ) ,

303

" c o n f u s i o n _ S V M " : m e a n _ c o n f ( c o n f _ S V M _ t s ) ,

304

" m e a n _ a c c u r a c y _ L R " : np . mean ( a c c _ L R _ t s ) ,

305

" s t d _ a c c u r a c y _ L R " : np . std ( a c c _ L R _ t s ) ,

306

" m e a n _ f 1 s c o r e _ L R " : np . mean ( f 1 _ L R _ t s ) ,

307

" s t d _ f 1 s c o r e _ L R " : np . std ( f 1 _ L R _ t s ) ,

308

" c o n f u s i o n _ L R " : m e a n _ c o n f ( c o n f _ L R _ t s ) ,

309

" m e a n _ a c c u r a c y _ M L P " : np . mean ( a c c _ M L P _ t s ) ,

310

" s t d _ a c c u r a c y _ M L P " : np . std ( a c c _ M L P _ t s ) ,

311

" m e a n _ f 1 s c o r e _ M L P " : np . mean ( f 1 _ M L P _ t s ) ,

312

" s t d _ f 1 s c o r e _ M L P " : np . std ( f 1 _ M L P _ t s ) ,

313

" c o n f u s i o n _ M L P " : m e a n _ c o n f ( c o n f _ M L P _ t s )

,

314

}

315

316

# s t o r e best p a r a m e t r s

317

p a r _ t o _ s t o r e = {

318

" f e a t u r e s _ n u m b e r " : f "{ i }" ,

319

" N e a r e s t N e i g h b o r " : b e s t _ p a r _ N N ,

320

" S u p p o r t V e c t o r M a c h i n e " : b e s t _ p a r _ S V M ,

321

" L o g i s t i c R e g r e s i o n " : b e s t _ p a r _ L R ,

322

" Multi - l a y e r P e r c e p t r o n " : b e s t _ p a r _ M L P ,

323

" Best S e l e c t e d F e a t u r e s " : l i s t _ i m _ f e a t [: i ] ,

324

}

325

326

t r a i n _ m e t r i c s = t r a i n _ m e t r i c s . a p p e n d ( t r a i n _ d a t a _ t o _ s t o r e , i g n o r e _ i n d e x = True )

327

t e s t _ m e t r i c s = t e s t _ m e t r i c s . a p p e n d ( t e s t _ d a t a _ t o _ s t o r e , i g n o r e _ i n d e x = True )

328

p a r a m e t e r s = p a r a m e t e r s . a p p e n d ( p a r _ t o _ s t o r e , i g n o r e _ i n d e x = True )

329

330

# r e d u c i n g n u m b e r of f e a t u r e s for next i t e r a t i o n

331

i -= 1

332

333

# save the data as a csv file

334

t r a i n _ m e t r i c s . t o _ c s v ( " ... [ path to save t r a i n i n g r e s u l t ] " )

335

t e s t _ m e t r i c s . t o _ c s v ( " ... [ path to save tst r e s u l t ] " )

336

p a r a m e t e r s . t o _ c s v ( " ... [ path to save best h y p r e p a r a m e t r e ] " )

libraries_result.py

Documenti correlati