長い文字列から、「2021/11/12」などの年月日文字列を抽出します。
import re
s = '/var/www/html/DICOM/2021/11/12/64D94C0D/A8BA92DA/5B04B6B8'
pattern = '\d{4}/\d{2}/\d{2}'
res = re.search(pattern, s)
print(res.group())
結果は
2021/11/12
dicom ファイルの保存先のパスを作成します。
import pydicom
import os
import re
class Dicom:
def getDicomArr( self, sd ):
DicomArr = []
for root, dir, files in os.walk( sd ):
for file_ in files:
full_path = os.path.join(root, file_)
DicomArr.append( full_path )
return DicomArr
@classmethod
def getBdir(self, ef ):
pattern = '\d{4}/\d{2}/\d{2}'
res = re.search(pattern, ef)
return res.group()
def getDicomInfo( self, dcmArr ):
dcmInfoArr = []
studyIDArr =[]
for eachFile in dcmArr:
ds = pydicom.read_file(eachFile)
studyID = ds[0x0020, 0x0010].value
if studyID not in studyIDArr:
studyDate = ds[0x0008, 0x0020].value
studyTime = ds[0x0008, 0x0030].value
modality = ds[0x0008, 0x0060].value
try:
studyDscr = ds[0x0008, 0x1030].value
except:
studyDscr = ''
ptName = str(ds[0x0010, 0x0010].value).replace('^', ' ')
karteNo = ds[0x0010, 0x0020].value
sex = ds[0x0010, 0x0040].value
birthday = ds[0x0010, 0x0030].value
age = ds[0x0010, 0x1010].value
institution = ds[0x0008, 0x0080].value
bdir = Dicom.getBdir(eachFile)
path = bdir + '/' + karteNo + '/' + studyID
thisLineInfo = [ studyID, studyDate, studyTime, modality, studyDscr, ptName, karteNo, birthday, sex, age, institution, path ]
dcmInfoArr.append( thisLineInfo )
studyIDArr.append( studyID )
return dcmInfoArr
if __name__ == "__main__":
dcm = Dicom()
dcmArr = dcm.getDicomArr( '/var/www/html/DICOM/2021/11/12' )
infos = dcm.getDicomInfo( dcmArr )
print( infos )
infos には以下のような情報が入っています。(氏名と病院名は表示していません)
['9482', '20200129', '152540', 'CT', 'Abdomen', '21902405', '19870506', 'F', '032Y', '2021/11/12/21902405/9482']
['9464', '20200125', '142126', 'CT', 'Abdomen', '22000187', '19870910', 'M', '032Y', '2021/11/12/22000187/9464']
['9417', '20200110', '132943', 'CT', 'Chest', '21601458', '19331224', 'F', '086Y', '2021/11/12/21601458/9417']
['9463', '20200125', '094512', 'CT', 'Chest', '21800186', '19530405', 'M', '066Y', '2021/11/12/21800186/9463']
['9419', '20200110', '153217', 'CT', 'Chest', '21502808', '19411116', 'F', '078Y', '2021/11/12/21502808/9419']
['9468', '20200127', '095618', 'CT', 'Abdomen', '22000191', '19690206', 'M', '050Y', '2021/11/12/22000191/9468']
['9431', '20200115', '104327', 'CT', 'Abdomen', '21800138', '19950118', 'M', '024Y', '2021/11/12/21800138/9431']
['996', '20200123', '134853', 'RF', '', '20703388', '19531001', 'M', '066Y', '2021/11/12/20703388/996']
クラスメソッド
クラスメソッドにすると、クラス名 + メソッドでアクセスできるので便利です。
@classmethod
def getBdir(self, ef ):
pattern = '\d{4}/\d{2}/\d{2}'
res = re.search(pattern, ef)
return res.group()
正規表現の訂正
年月日は「2021/11/6」というように1桁のものもあるので上の正規表現ではエラーが出ます。
月と日は数字が1桁もしくは2桁なので、
pattern = '\d{4}/\d{1,2}/\d{1,2}'