dicom のタグ情報を mysql に書き込む python

python で読み込んだ dicom のタグ情報を mysql に書き込みます。

データベースとテーブルの作成

新たにデータベースを作成してタグ情報を書き込むテーブルを作成します。


mysql> create database pydcmdb;

テーブル作成。


CREATE TABLE `taginfo` (
  `studyID` int AUTO_INCREMENT,
  `studyDate` varchar(20) NOT NULL,
  `studyTime` varchar(30) NULL,
  `modality` varchar(30) NOT NULL,
  `studyDscr` varchar(30) DEFAULT NULL,
  `ptName` varchar(100) NOT NULL,
  `karteNo` varchar(30) NOT NULL,
  `birthday` varchar(20) NOT NULL,
  `sex` varchar(20) NOT NULL,
  `age` varchar(20) NOT NULL,
  `institution` varchar(20) NULL,
  `path` varchar(50) NOT NULL, 
  PRIMARY KEY(studyID)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

データベースにタグ情報を書き込む


import pydicom
import os
import re
import pymysql

class Dicom:

    def getDicomArr( self, sd ):
        DicomArr = []
        for root, dir, files in os.walk( sd ):
            for file_ in files:
                full_path = os.path.join(root, file_)
                DicomArr.append( full_path )
        return DicomArr
    
    @classmethod 
    def getBdir(self, ef ):
        pattern = '\d{4}/\d{2}/\d{2}'
        res = re.search(pattern, ef)
        return res.group()

    def getDicomInfo( self, dcmArr ):
        dcmInfoArr = []
        studyIDArr =[]
        for eachFile in dcmArr:           
            ds = pydicom.read_file(eachFile)
            studyID = ds[0x0020, 0x0010].value
            if studyID not in studyIDArr:        
                studyDate = ds[0x0008, 0x0020].value
                studyTime = ds[0x0008, 0x0030].value
                modality = ds[0x0008, 0x0060].value
                try:
                    studyDscr = ds[0x0008, 0x1030].value
                except:
                    studyDscr = ''
                ptName = str(ds[0x0010, 0x0010].value).replace('^', ' ')
                karteNo = ds[0x0010, 0x0020].value
                sex = ds[0x0010, 0x0040].value
                birthday = ds[0x0010, 0x0030].value
                age = ds[0x0010, 0x1010].value
                institution = ds[0x0008, 0x0080].value
                bdir = Dicom.getBdir(eachFile)
                path = bdir + '/' + karteNo + '/' + studyID 
                thisLineInfo = [ studyID, studyDate, studyTime, modality, studyDscr, ptName, karteNo, birthday, sex, age, institution, path ]
                dcmInfoArr.append( thisLineInfo )
                studyIDArr.append( studyID )
        return dcmInfoArr
    
    def insertDB( self, DicomInfoArr ):           
        conn = pymysql.connect(host='localhost',
            user='root',
            db='pydcmdb',
            password='password',
            charset='utf8mb4',
            cursorclass=pymysql.cursors.DictCursor)
        try:
            with conn.cursor() as cursor:
                sql = "REPLACE INTO taginfo VALUES ( %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s )"
                cursor.executemany( sql, DicomInfoArr )
            conn.commit()
        finally:
            conn.close()

if __name__ == "__main__":
    dcm = Dicom()
    dcmArr = dcm.getDicomArr( '/var/www/html/DICOM/2021/11/12' )
    infos = dcm.getDicomInfo( dcmArr )
    dcm.insertDB( infos )

検査IDを primary key として replace into で書き込めば上書きされます。

python のすごいところは、executemany で大きな2次元配列を一気に書き込むことができる点です。