FY2526-SW-PoC-APIRelay/FY2526-SW-PoC-APIRelay/GetDataFromFile.cs

using ImageMagick;
using InfoExtraction;
using Microsoft.Office.Interop.Excel;
using Microsoft.Office.Interop.PowerPoint;
using Microsoft.Office.Interop.Word;
using PdfiumViewer;
using System.Text;
using System.Text.RegularExpressions;
using Windows.Graphics.Imaging;
using Windows.Media.Ocr;
using Windows.Storage;
using ExcelApplication = Microsoft.Office.Interop.Excel.Application;
using PowerPointApplication = Microsoft.Office.Interop.PowerPoint.Application;
using WordApplication = Microsoft.Office.Interop.Word.Application;

namespace FY2526_SW_PoC_APIRelay
{
    internal class GetDataFromFile
    {
        /// <summary>OCR</summary>
        /// <param name="filePath">ファイルパス</param>
        /// <returns>読み取った文字列</returns>
        ///
        public static async Task<string?> Ocr(string filePath)
        {
            try
            {
                string extension = Path.GetExtension(filePath).ToLower();
                string? convertedFilePath = null;

                if (extension == ".heic")
                {
                    try
                    {
                        convertedFilePath = ConvertHeicToJpeg(filePath);
                        if (convertedFilePath == null)
                        {
                            LogWriter.WriteLog("HEICファイルの変換に失敗しました", LogWriter.LogLevel.ERROR);
                            return null;
                        }
                        filePath = convertedFilePath;
                    }
                    catch (Exception conversionException)
                    {
                        LogWriter.WriteLog($"HEICからJPEGへの変換中にエラーが発生しました: {conversionException.Message}", LogWriter.LogLevel.ERROR);
                        return null;
                    }
                }

                // ファイルを開き、BitmapDecoderを作成
                var file = await StorageFile.GetFileFromPathAsync(filePath);
                var stream = await file.OpenAsync(FileAccessMode.Read);
                var decoder = await BitmapDecoder.CreateAsync(stream);
                var bmp = await decoder.GetSoftwareBitmapAsync();

                try
                {
                    var engine = OcrEngine.TryCreateFromLanguage(new Windows.Globalization.Language("ja"));
                    if (engine != null)
                    {
                        var result = await engine.RecognizeAsync(bmp);

                        // OcrResult.Lines から各行のテキストを取得し、改行で結合
                        var extractedText = string.Join(Environment.NewLine, result.Lines.Select(line => line.Text));

                        if (convertedFilePath != null && File.Exists(convertedFilePath))
                        {
                            try
                            {
                                File.Delete(convertedFilePath); // JPEGファイルを削除
                            }
                            catch (Exception deleteException)
                            {
                                LogWriter.WriteLog($"JPEGファイルの削除に失敗しました: {deleteException.Message}", LogWriter.LogLevel.ERROR);
                            }
                        }

                        return extractedText;
                    }
                    else
                    {
                        LogWriter.WriteLog("OCRエンジンの作成に失敗しました", LogWriter.LogLevel.ERROR);
                        return null;
                    }
                }
                catch (Exception ocrException)
                {
                    LogWriter.WriteLog($"OCR認識に失敗しました: {ocrException.Message}", LogWriter.LogLevel.ERROR);
                    return null;
                }
            }
            catch (Exception e)
            {
                LogWriter.WriteLog($"予期しないエラーが発生しました: {e.Message}", LogWriter.LogLevel.ERROR);
                return null;
            }
        }

        /// <summary>heicをjpegに変換する</summary>
        /// <param name="heicFilePath">heicファイルのパス</param>
        /// <returns>変換後のjpegファイルのパス</returns>
        ///
        private static string? ConvertHeicToJpeg(string heicFilePath)
        {
            try
            {
                // Magick.NETを使用してHEICファイルをJPEGに変換
                string outputFilePath = heicFilePath.Replace(".heic", ".jpg", StringComparison.OrdinalIgnoreCase);

                using (var image = new MagickImage(heicFilePath))
                {
                    // JPEGとして保存
                    image.Format = MagickFormat.Jpeg;
                    image.Write(outputFilePath);
                }

                return outputFilePath;
            }
            catch (Exception e)
            {
                LogWriter.WriteLog($"HEICファイルの変換中にエラーが発生しました: {e.Message}", LogWriter.LogLevel.ERROR);
                return null; // 変換に失敗した場合はnullを返す
            }
        }

        public static string? ReadDocument(string filePath)
        {
            try
            {
                Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
                string extension = Path.GetExtension(filePath).ToLower();
                string extractedText = string.Empty;
                string? result = null;

                switch (Path.GetExtension(filePath.ToLower()))
                {
                    case ".txt":
                        byte[] firstData = new byte[1000];
                        //ゼロサイズのutf-16LE.BE 対応
                        firstData[2] = 0xFF;
                        firstData[3] = 0xFF;
                        int readCount;
                        using (FileStream fs = new(filePath, FileMode.Open))
                        {
                            readCount = fs.Read(firstData, 0, 1000);
                        }

                        using (StreamReader sr = new(filePath, GetEncoding(firstData, readCount)))
                        {
                            result = sr.ReadToEnd();
                        }
                        break;
                    case ".pdf":
                        using (PdfDocument doc = PdfDocument.Load(filePath))
                        {
                            for (var pageNum = 0; pageNum < doc.PageCount; pageNum++)
                            {
                                result += doc.GetPdfText(pageNum);
                            }
                        }
                        break;
                    case ".docx":
                    case ".doc":
                        using (var word = new ComWrapper<WordApplication>(new WordApplication() { Visible = false, DisplayAlerts = WdAlertLevel.wdAlertsNone }))
                        using (var docs = new ComWrapper<Documents>(word.ComObject.Documents))
                        {
                            using var doc = new ComWrapper<Document>(docs.ComObject.Open(filePath,
                                        ReadOnly: true,
                                        AddToRecentFiles: false,
                                        Visible: false));
                            var tempFiles = new string[2];

                            bool success = false;
                            try
                            {
                                tempFiles[0] = Path.GetTempFileName();
                                tempFiles[1] = Path.GetTempFileName();

                                // Wordドキュメントの全テキストを取得
                                string fullText = doc.ComObject.Content.Text;

                                // テキストを一時ファイルに保存
                                File.WriteAllText(tempFiles[0], fullText, Encoding.UTF8);

                                // テキストボックス内のテキストやコメントも抽出
                                List<string> otherContents = new List<string>();

                                foreach (Microsoft.Office.Interop.Word.Shape shape in doc.ComObject.Shapes)
                                {
                                    // 図形のテキスト抽出
                                    ExtractShapeContents(otherContents, shape);
                                }
                                foreach (Microsoft.Office.Interop.Word.Comment comment in doc.ComObject.Comments)
                                {
                                    // コメントの保存
                                    otherContents.Add(comment.Author + ":" + comment.Range.Text);
                                }

                                // 他のテキストを一時ファイルに保存
                                File.WriteAllLines(tempFiles[1], otherContents, Encoding.GetEncoding("UTF-8"));

                                success = true;
                            }
                            catch
                            {
                                throw;
                            }
                            finally
                            {
                                doc.ComObject.Close(false);
                                if (success)
                                {
                                    // マージ
                                    result = MergeTextContents(tempFiles);
                                }
                                DeleteFiles(tempFiles);
                            }
                        }
                        break;
                    case ".xlsx":
                    case ".xls":
                        using (var excel = new ComWrapper<ExcelApplication>(new ExcelApplication() { Visible = false, DisplayAlerts = false }))
                        using (var books = new ComWrapper<Workbooks>(excel.ComObject.Workbooks))
                        {
                            using var book = new ComWrapper<Workbook>(books.ComObject.Open(filePath,
                                    UpdateLinks: XlUpdateLinks.xlUpdateLinksNever,
                                    ReadOnly: true,
                                    IgnoreReadOnlyRecommended: true,
                                    Editable: false));

                            List<string> contents = new();
                            List<string> tempFiles = new();

                            bool success = false;
                            try
                            {
                                for (int i = 1; i <= book.ComObject.Worksheets.Count; i++)
                                {
                                    using var sheet = new ComWrapper<Worksheet>(book.ComObject.Worksheets[i]);

                                    var tempFile1 = Path.GetTempFileName();
                                    tempFiles.Add(tempFile1);

                                    // シート内のテキストを抽出
                                    List<string> sheetContents = new();
                                    foreach (Microsoft.Office.Interop.Excel.Range cell in sheet.ComObject.UsedRange)
                                    {
                                        // セルの内容を追加
                                        sheetContents.Add(cell.Text.ToString());
                                    }

                                    // シート内のテキストを一時ファイルに保存
                                    File.WriteAllLines(tempFile1, sheetContents, Encoding.GetEncoding("UTF-8"));

                                    // 図形内のテキスト抽出
                                    List<string> otherContents = new();
                                    foreach (Microsoft.Office.Interop.Excel.Shape shape in sheet.ComObject.Shapes)
                                    {
                                        // 図形のテキスト抽出
                                        ExtractShapesContents(otherContents, shape);
                                    }

                                    // コメントの抽出
                                    foreach (Microsoft.Office.Interop.Excel.CommentThreaded comment in sheet.ComObject.CommentsThreaded)
                                    {
                                        // コメントのテキストを追加
                                        otherContents.Add(comment.Author.Name + ":" + comment.Text());
                                    }

                                    // メモの抽出
                                    foreach (Microsoft.Office.Interop.Excel.Comment memo in sheet.ComObject.Comments)
                                    {
                                        // メモのテキストを追加
                                        otherContents.Add(memo.Author + ":" + memo.Text());
                                    }

                                    var tempFile2 = Path.GetTempFileName();
                                    tempFiles.Add(tempFile2);
                                    File.WriteAllLines(tempFile2, otherContents, Encoding.GetEncoding("UTF-8"));
                                }

                                success = true;
                            }
                            catch
                            {
                                throw;
                            }
                            finally
                            {
                                book.ComObject.Close(false);
                                if (success)
                                {
                                    // 一時ファイルから内容をマージ
                                    foreach (var tempFile in tempFiles)
                                    {
                                        var sheetContents = MergeTextContents(new string[] { tempFile });
                                        contents.Add(sheetContents);
                                        File.Delete(tempFile);
                                    }

                                    // 結果として、全ての内容を結合
                                    result = string.Join(string.Empty, contents);
                                }

                                // 一時ファイルの削除
                                DeleteFiles(tempFiles.ToArray());
                            }
                        }
                        break;
                    case ".pptx":
                    case ".ppt":
                        using (var powerPoint = new ComWrapper<PowerPointApplication>(new PowerPointApplication() { DisplayAlerts = PpAlertLevel.ppAlertsNone }))
                        using (var ppts = new ComWrapper<Presentations>(powerPoint.ComObject.Presentations))
                        {
                            using var ppt = new ComWrapper<Presentation>(ppts.ComObject.Open(filePath,
                                ReadOnly: Microsoft.Office.Core.MsoTriState.msoTrue,
                                WithWindow: Microsoft.Office.Core.MsoTriState.msoFalse));

                            var contents = new List<string>();
                            var tempFiles = new string[1];
                            var success = false;
                            try
                            {
                                tempFiles[0] = Path.GetTempFileName();

                                // 図形のテキストとコメントの抽出
                                var slideContents = new List<string>();
                                foreach (Slide slide in ppt.ComObject.Slides)
                                {
                                    foreach (Microsoft.Office.Interop.PowerPoint.Shape shape in slide.Shapes)
                                    {
                                        ExtractShapeContents(slideContents, shape);
                                    }
                                    foreach (Microsoft.Office.Interop.PowerPoint.Comment comment in slide.Comments)
                                    {
                                        slideContents.Add(comment.Author + ":" + comment.Text);
                                    }
                                    slideContents.Add(slide.NotesPage.Shapes.Placeholders[2].TextFrame.TextRange.Text);//placefolders[1] is slide itself.
                                }
                                File.WriteAllLines(tempFiles[0], slideContents, Encoding.GetEncoding("UTF-8"));
                                success = true;
                            }
                            finally
                            {
                                ppt.ComObject.Close();
                                if (success)
                                {
                                    result = MergeTextContents(tempFiles);
                                }
                                DeleteFiles(tempFiles);
                            }
                            break;
                        }
                    default:
                        throw new ArgumentException("ドキュメントファイルではありません", nameof(filePath));
                }
                return result;
            }
            catch (Exception ex)
            {
                System.Console.Write(ex.Message);
                System.Console.Write(ex.StackTrace);
                LogWriter.WriteLog($"読み出しでエラー発生: {ex.Message}", LogWriter.LogLevel.ERROR);
                throw;
            }
        }

        private static void ExtractShapeContents(List<string> contents, Microsoft.Office.Interop.Word.Shape shape)
        {
            shape.Select();
            if (shape.Type == Microsoft.Office.Core.MsoShapeType.msoGroup)
            {
                foreach (Microsoft.Office.Interop.Word.Shape subShape in shape.GroupItems)
                {
                    // グループ内図形に対して再帰呼び出し
                    ExtractShapeContents(contents, subShape);
                }
            }
            else if (shape.Type == Microsoft.Office.Core.MsoShapeType.msoCanvas)
            {
                foreach (Microsoft.Office.Interop.Word.Shape subShape in shape.CanvasItems)
                {
                    // キャンバス内図形に対して再帰呼び出し
                    ExtractShapeContents(contents, subShape);
                }
            }
            else
            {
                if (shape.TextFrame != null && shape.TextFrame.HasText != 0)
                {
                    // 図形内テキストの保存
                    var text = shape.TextFrame?.TextRange?.Text;
                    if (!string.IsNullOrEmpty(text))
                    {
                        contents.Add(text);
                    }
                }
            }
        }

        private static void ExtractShapesContents(List<string> contents, Microsoft.Office.Interop.Excel.Shape shape)
        {
            if (shape.Type == Microsoft.Office.Core.MsoShapeType.msoGroup)
            {
                foreach (Microsoft.Office.Interop.Excel.Shape subShape in shape.GroupItems)
                {
                    ExtractShapesContents(contents, subShape);
                }
            }
            else if (shape.Type == Microsoft.Office.Core.MsoShapeType.msoCanvas)
            {
                foreach (Microsoft.Office.Interop.Excel.Shape subShape in shape.CanvasItems)
                {
                    ExtractShapesContents(contents, subShape);
                }
            }
            else
            {
                try
                {
                    var text = shape.TextEffect?.Text;
                    if (!string.IsNullOrEmpty(text))
                    {
                        contents.Add(text);
                    }
                }
                catch
                {
                }
            }
        }

        private static void ExtractShapeContents(List<string> contents, Microsoft.Office.Interop.PowerPoint.Shape shape)
        {
            if (shape.Type == Microsoft.Office.Core.MsoShapeType.msoGroup)
            {
                foreach (Microsoft.Office.Interop.PowerPoint.Shape subShape in shape.GroupItems)
                {
                    ExtractShapeContents(contents, subShape);
                }
            }
            else if (shape.Type == Microsoft.Office.Core.MsoShapeType.msoCanvas)
            {
                foreach (Microsoft.Office.Interop.PowerPoint.Shape subShape in shape.CanvasItems)
                {
                    ExtractShapeContents(contents, subShape);
                }
            }
            else
            {
                if (shape.TextFrame != null && shape.TextFrame.HasText == Microsoft.Office.Core.MsoTriState.msoTrue)
                {
                    var text = shape.TextFrame?.TextRange?.Text;
                    if (!string.IsNullOrEmpty(text))
                    {
                        contents.Add(text);
                    }
                }
            }
        }

        private static string MergeTextContents(string[] tempFiles)
        {
            var contents = new List<string>();
            foreach (var tempFile in tempFiles)
            {
                if (File.Exists(tempFile))
                {
                    contents.AddRange(File.ReadAllLines(tempFile, Encoding.GetEncoding("Shift_JIS")));
                }
            }
            return string.Join("", contents);
        }

        private static void DeleteFiles(string[] tempFiles)
        {
            foreach (var tempFile in tempFiles)
            {
                if (File.Exists(tempFile))
                {
                    File.Delete(tempFile);
                }
            }
        }

        private static Encoding GetEncoding(byte[] firstData, int dataLength)
        {
            if (dataLength < 2)
            {
                return Encoding.GetEncoding("Shift_JIS");
            }

            if ((firstData[0] == 0xfe) && (firstData[1] == 0xff))
            {
                // UTF-16 BE
                return new UnicodeEncoding(true, true);
            }

            if ((firstData[0] == 0xff) && (firstData[1] == 0xfe))
            {
                if ((4 <= firstData.Length) &&
                    (firstData[2] == 0x00) && (firstData[3] == 0x00))
                {
                    // UTF-32 LE
                    return new UTF32Encoding(false, true);
                }
                // UTF-16 LE
                return new UnicodeEncoding(false, true);
            }

            if (dataLength < 3)
            {
                return Encoding.GetEncoding("Shift_JIS");
            }

            if ((firstData[0] == 0xef) && (firstData[1] == 0xbb) && (firstData[2] == 0xbf))
            {
                //UTF-8
                return new UTF8Encoding(true, true);
            }

            if (dataLength < 4)
            {
                return Encoding.GetEncoding("Shift_JIS");
            }

            if ((firstData[0] == 0x00) && (firstData[1] == 0x00) &&
                (firstData[2] == 0xfe) && (firstData[3] == 0xff))
            {
                // UTF-32 BE
                return new UTF32Encoding(true, true);
            }

            // BOMなし
            var ret = JISEncodingJudgment(firstData, dataLength);
            if (ret.Item1 == false)
            {
                if (ret.Item2)
                {
                    return Encoding.GetEncoding("iso-2022-jp");
                }
                else
                {
                    return Encoding.GetEncoding("us-ascii");
                }
            }

            var outOfSpecification = Utf8EncodingJudgment(firstData, dataLength);
            if (outOfSpecification == false)
            {
                //UTF-8
                return new UTF8Encoding(true, true);
            }

            outOfSpecification = EUCJPEncodingJudgment(firstData, dataLength);
            if (outOfSpecification == false)
            {
                return Encoding.GetEncoding("EUC-JP");
            }

            return Encoding.GetEncoding("Shift_JIS");
        }

        private static bool IsMatched(byte[] data, byte[] bom)
        {
            bool result = true;

            for (int i = 0; i < bom.Length; i++)
            {
                if (bom[i] != data[i])
                {
                    result = false;
                    break;
                }
            }

            return result;
        }

        private static (bool, bool) JISEncodingJudgment(byte[] buffer, int sizeOfBuffer)
        {
            bool result = false;
            bool esc1 = false;
            bool esc2 = false;
            byte[] byteESC1 = { 0x1B, 0x28, 0x42 };
            byte[] byteESC2 = { 0x1B, 0x24, 0x42 };
            byte[] backESC = { 0, 0, 0 };

            for (int i = 0; i < sizeOfBuffer; i++)
            {
                if (0x80 <= buffer[i])
                {
                    result = true;
                    break;
                }
                else
                {
                    backESC[0] = backESC[1];
                    backESC[1] = backESC[2];
                    backESC[2] = buffer[i];
                    if (esc1 == false && IsMatched(backESC, byteESC1))
                    {
                        esc1 = true;
                    }
                    if (esc2 == false && IsMatched(backESC, byteESC2))
                    {
                        esc2 = true;
                    }
                }
            }

            return (result, esc1 || esc2);
        }

        private static bool Utf8EncodingJudgment(byte[] buffer, int sizeOfBuffer)
        {
            bool outOfSpecification;

            outOfSpecification = false;
            uint[] byteChar = new uint[6];
            int byteCharCount = 0;

            for (int i = 0; i < sizeOfBuffer; i++)
            {
                //２バイト文字以上である
                if (0x80 <= buffer[i])
                {
                    //２バイト文字
                    uint char2byte = (uint)0b11100000 & buffer[i];
                    if (char2byte == 0b11000000)
                    {
                        //セカンドコード数が規格より少なければ規格外
                        outOfSpecification = Utf8OutOfSpecification(byteChar[0], byteCharCount, false);
                        if (outOfSpecification)
                        {
                            break;
                        }

                        byteChar[0] = char2byte;
                        byteCharCount = 1;
                        continue;
                    }

                    //3バイト文字
                    uint char3byte = (uint)0b11110000 & (uint)buffer[i];
                    if (char3byte == 0b11100000)
                    {
                        //セカンドコード数が規格より少なければ規格外
                        outOfSpecification = Utf8OutOfSpecification(byteChar[0], byteCharCount, false);
                        if (outOfSpecification)
                        {
                            break;
                        }

                        byteChar[0] = char3byte;
                        byteCharCount = 1;
                        continue;
                    }

                    //4バイト文字
                    uint char4byte = (uint)0b11111000 & (uint)buffer[i];
                    if (char4byte == 0b11110000)
                    {
                        //セカンドコード数が規格より少なければ規格外
                        outOfSpecification = Utf8OutOfSpecification(byteChar[0], byteCharCount, false);
                        if (outOfSpecification)
                        {
                            break;
                        }

                        byteChar[0] = char4byte;
                        byteCharCount = 1;
                        continue;
                    }

                    //２バイト目以降のコード
                    uint charSecond = (uint)0b11000000 & (uint)buffer[i];
                    if (charSecond == 0b10000000)
                    {
                        // 文字の先頭がセカンドコードなら規格外
                        if (byteCharCount < 1)
                        {
                            outOfSpecification = true;
                            break;
                        }

                        //セカンドコードを保存
                        byteChar[byteCharCount] = charSecond;
                        byteCharCount++;

                        //セカンドコード数が規格より多ければ規格外
                        outOfSpecification = Utf8OutOfSpecification(byteChar[0], byteCharCount, true);
                        if (outOfSpecification)
                        {
                            break;
                        }

                        continue;
                    }

                    //どれにも当てはまらない
                    outOfSpecification = true;
                    break;
                }
                else
                {
                    // 7bit文字
                    byteChar[0] = 0;
                    byteCharCount = 0;
                }
            }

            return outOfSpecification;
        }

        private static bool Utf8OutOfSpecification(uint topByteChar, int byteCharCount, bool checkBig)
        {
            bool outOfSpecification = false;

            //セカンドコード数が規格より多ければ規格外
            if (topByteChar == 0b11000000)
            {
                if (checkBig == true)
                {
                    if (byteCharCount > 2) outOfSpecification = true;
                }
                else
                {
                    if (byteCharCount < 2) outOfSpecification = true;
                }
            }
            else if (topByteChar == 0b11100000)
            {
                if (checkBig == true)
                {
                    if (byteCharCount > 3) outOfSpecification = true;
                }
                else
                {
                    if (byteCharCount < 3) outOfSpecification = true;
                }
            }
            else if (topByteChar == 0b11110000)
            {
                if (checkBig == true)
                {
                    if (byteCharCount > 4) outOfSpecification = true;
                }
                else
                {
                    if (byteCharCount < 4) outOfSpecification = true;
                }
            }

            return outOfSpecification;
        }

        private enum BYTECODE : byte { OneByteCode, TwoByteCode, KanaOneByte }

        private static bool EUCJPEncodingJudgment(byte[] buffer, int sizeOfBuffer)
        {
            bool outOfSpecification = false;

            BYTECODE beforeCode = BYTECODE.OneByteCode;
            int byteCharCount = 0;

            for (int i = 0; i < sizeOfBuffer; i++)
            {
                // 2バイトコード
                if (0xA1 <= buffer[i] && buffer[i] <= 0xFE)
                {
                    if (beforeCode == BYTECODE.KanaOneByte)
                    {
                        if (byteCharCount == 1)
                        {
                            byteCharCount = 2;
                        }
                        else
                        {
                            outOfSpecification = true;
                            break;
                        }
                    }

                    if (beforeCode == BYTECODE.TwoByteCode)
                    {
                        if (byteCharCount == 1)
                            byteCharCount = 2;
                        else if (byteCharCount == 2)
                            byteCharCount = 1;
                    }

                    beforeCode = BYTECODE.TwoByteCode;
                }
                // 1バイトコード
                else if (buffer[i] <= 0x7F)
                {
                    if (beforeCode == BYTECODE.TwoByteCode && byteCharCount == 1)
                    {
                        outOfSpecification = true;
                        break;
                    }

                    beforeCode = BYTECODE.OneByteCode;
                    byteCharCount = 1;
                }
                // 半角カタカナ2バイトコード
                else if (buffer[i] == 0x8E && byteCharCount == 1)
                {
                    beforeCode = BYTECODE.KanaOneByte;
                    byteCharCount = 1;
                }
                // あり得ない
                else
                {
                    outOfSpecification = true;
                    break;
                }
            }

            return outOfSpecification;
        }

        /// <summary>クレンジング</summary>
        /// <param name="fileText">文字列</param>
        /// <param name="csvFilePath">csvファイルのパス</param>
        /// <returns>特殊記号などを除去した文字列</returns>
        ///
        public static string? CleansingText(string fileText, string csvFilePath)
        {
            string? result = null;

            try
            {
                // CSVファイルから除外する特殊記号を読み込む
                System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
                List<string> specialCharacters = LoadSpecialCharacters(csvFilePath);

                if (specialCharacters.Count > 0)
                {
                    // 特殊記号を1つずつ削除
                    foreach (var specialChar in specialCharacters)
                    {
                        if (!string.IsNullOrEmpty(specialChar))
                        {
                            fileText = fileText.Replace(specialChar, string.Empty);
                        }
                    }
                    // 文字の統一処理（数字の半角化、アルファベット小文字化、ひらがなカタカナ漢字の全角化）
                    fileText = NormalizeText(fileText);
                    result = fileText; // 結果を返す
                }
                else
                {
                    result = fileText; // 特殊記号が無ければそのまま返す
                }
            }
            catch (Exception ex)
            {
                LogWriter.WriteLog($"クレンジング処理中にエラーが発生しました: {ex.Message}", LogWriter.LogLevel.ERROR);
                Console.WriteLine($"エラーが発生しました: {ex.Message}");
                result = fileText; // エラーが発生した場合は元の文字列を返す
            }

            return result;
        }

        /// <summary>
        /// CSVファイルから特殊記号のリストを読み込む
        /// </summary>
        /// <param name="csvFilePath">CSVファイルのパス</param>
        /// <returns>特殊記号のリスト</returns>
        ///
        private static List<string> LoadSpecialCharacters(string csvFilePath)
        {
            List<string> specialCharacters = new List<string>();

            try
            {
                // Shift-JISエンコーディングでファイルを開く
                using (var reader = new StreamReader(csvFilePath, System.Text.Encoding.GetEncoding("shift_jis")))
                {
                    // ファイルが空でないことを確認
                    if (!reader.EndOfStream)
                    {
                        var line = reader.ReadLine();  // 1行のみ読み込む

                        // lineがnullでないことを確認
                        if (line != null)
                        {
                            var trimmedLine = line.Trim();

                            if (!string.IsNullOrEmpty(trimmedLine))
                            {
                                // 行をカンマで分割して、列を取り出す
                                var columns = trimmedLine.Split(',');

                                // 列ごとにリストに追加
                                foreach (var column in columns)
                                {
                                    specialCharacters.Add(column.Trim());
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                LogWriter.WriteLog($"CSVファイルの読み込みでエラーが発生しました: {ex.Message}", LogWriter.LogLevel.ERROR);
                Console.WriteLine($"CSVファイルの読み込みエラー: {ex.Message}");
            }

            return specialCharacters;
        }

        /// <summary>
        /// 文字列の正規化（数字半角化、アルファベット小文字化、ひらがなカタカナ漢字の全角化）
        /// </summary>
        /// <param name="inputText">入力文字列</param>
        /// <returns>正規化された文字列</returns>
        private static string NormalizeText(string inputText)
        {
            // 1. 全角化
            string result = NormalizeKatakana(inputText);

            // 2. ローマ数字をアラビア数字に変換
            result = NormalizeRomanNumerals(result);

            // 3. アルファベットと数字を半角化
            result = NormalizeAlphabetAndNumbers(result);

            return result;
        }

        /// <summary>
        /// ひらがな、カタカナ、漢字を全角に変換する
        /// </summary>
        /// <param name="inputText">入力文字列</param>
        /// <returns>全角に変換された文字列</returns>
        private static string NormalizeKatakana(string inputText)
        {
            // 半角カタカナを全角カタカナに変換
            string result = inputText;
            result = result.Replace("ｱ", "ア")
                           .Replace("ｲ", "イ")
                           .Replace("ｳ", "ウ")
                           .Replace("ｴ", "エ")
                           .Replace("ｵ", "オ")
                           .Replace("ｧ", "ァ")
                           .Replace("ｨ", "ィ")
                           .Replace("ｩ", "ゥ")
                           .Replace("ｪ", "ェ")
                           .Replace("ｫ", "ォ")
                           .Replace("ｶ", "カ")
                           .Replace("ｷ", "キ")
                           .Replace("ｸ", "ク")
                           .Replace("ｹ", "ケ")
                           .Replace("ｺ", "コ")
                           .Replace("ｻ", "サ")
                           .Replace("ｼ", "シ")
                           .Replace("ｽ", "ス")
                           .Replace("ｾ", "セ")
                           .Replace("ｿ", "ソ")
                           .Replace("ﾀ", "タ")
                           .Replace("ﾁ", "チ")
                           .Replace("ﾂ", "ツ")
                           .Replace("ｯ", "ッ")
                           .Replace("ﾃ", "テ")
                           .Replace("ﾄ", "ト")
                           .Replace("ﾅ", "ナ")
                           .Replace("ﾆ", "ニ")
                           .Replace("ﾇ", "ヌ")
                           .Replace("ﾈ", "ネ")
                           .Replace("ﾉ", "ノ")
                           .Replace("ﾊ", "ハ")
                           .Replace("ﾋ", "ヒ")
                           .Replace("ﾌ", "フ")
                           .Replace("ﾍ", "ヘ")
                           .Replace("ﾎ", "ホ")
                           .Replace("ﾏ", "マ")
                           .Replace("ﾐ", "ミ")
                           .Replace("ﾑ", "ム")
                           .Replace("ﾒ", "メ")
                           .Replace("ﾓ", "モ")
                           .Replace("ﾔ", "ヤ")
                           .Replace("ﾕ", "ユ")
                           .Replace("ﾖ", "ヨ")
                           .Replace("ｬ", "ャ")
                           .Replace("ｭ", "ュ")
                           .Replace("ｮ", "ョ")
                           .Replace("ﾗ", "ラ")
                           .Replace("ﾘ", "リ")
                           .Replace("ﾙ", "ル")
                           .Replace("ﾚ", "レ")
                           .Replace("ﾛ", "ロ")
                           .Replace("ﾜ", "ワ")
                           .Replace("ｦ", "ヲ")
                           .Replace("ﾝ", "ン")
                           .Replace("ｰ", "ー");
            return result;
        }

        /// <summary>
        /// ローマ数字をアラビア数字に変換
        /// </summary>
        /// <param name="inputText">入力文字列</param>
        /// <returns>ローマ数字がアラビア数字に変換された文字列</returns>
        private static string NormalizeRomanNumerals(string inputText)
        {
            // ローマ数字のパターンを正規表現でマッチさせる
            string pattern = @"[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫⅬⅭⅮⅯⅰⅱⅲⅳⅴⅵⅶⅷⅸⅹⅺⅻⅼⅽⅾ]+";

            // 正規表現でローマ数字部分を検索し、それらをアラビア数字に変換
            return Regex.Replace(inputText, pattern, match => ConvertRomanToArabic(match.Value));
        }

        // ローマ数字をアラビア数字に変換
        private static string ConvertRomanToArabic(string roman)
        {
            // ローマ数字の対応を定義
            Dictionary<char, int> romanToArabicMap = new Dictionary<char, int>
            {
                { 'Ⅰ', 1 }, { 'Ⅱ', 2 }, { 'Ⅲ', 3 }, { 'Ⅳ', 4 }, { 'Ⅴ', 5 },
                { 'Ⅵ', 6 }, { 'Ⅶ', 7 }, { 'Ⅷ', 8 }, { 'Ⅸ', 9 }, { 'Ⅹ', 10 },
                { 'Ⅺ', 11 }, { 'Ⅻ', 12 }, { 'Ⅼ', 50 }, { 'Ⅽ', 100 }, { 'Ⅾ', 500 }, { 'Ⅿ', 1000 },
                { 'ⅰ', 1 }, { 'ⅱ', 2 }, { 'ⅲ', 3 }, { 'ⅳ', 4 }, { 'ⅴ', 5 },
                { 'ⅵ', 6 }, { 'ⅶ', 7 }, { 'ⅷ', 8 }, { 'ⅸ', 9 }, { 'ⅹ', 10 },
                { 'ⅺ', 11 }, { 'ⅻ', 12 }, { 'ⅼ', 50 }, { 'ⅽ', 100 }, { 'ⅾ', 500 }
            };

            int total = 0;
            int previousValue = 0;

            // 文字列を逆順で処理して、引き算と足し算を判定
            for (int i = roman.Length - 1; i >= 0; i--)
            {
                char currentChar = roman[i];

                // ローマ数字に含まれる文字かチェック
                if (romanToArabicMap.ContainsKey(currentChar))
                {
                    int currentValue = romanToArabicMap[currentChar];

                    // 次の文字が現在の文字より大きい場合は引き算
                    if (currentValue < previousValue)
                    {
                        total -= currentValue;
                    }
                    else
                    {
                        total += currentValue;
                    }

                    // 現在の値を次の文字の判定に使用
                    previousValue = currentValue;
                }
                else
                {
                    // ローマ数字に含まれない文字がある場合はスキップ
                    continue;
                }
            }

            return total.ToString();
        }

        /// <summary>
        /// アルファベットと数字を半角に変換する（アルファベットは小文字化）
        /// </summary>
        /// <param name="inputText">入力文字列</param>
        /// <returns>半角小文字アルファベット、半角数字に変換された文字列</returns>
        private static string NormalizeAlphabetAndNumbers(string inputText)
        {
            // アルファベットを小文字にし、半角に変換
            string result = inputText.ToLower();  // 小文字化
            result = ToHalfWidth(result);         // 半角化

            // 数字を半角に変換（これは既に半角化されている場合もあるので、念のため再度変換）
            result = NormalizeNumbers(result);  // 数字の半角化

            return result;
        }

        /// <summary>
        /// 数字を半角に変換する
        /// </summary>
        /// <param name="inputText">入力文字列</param>
        /// <returns>半角数字に変換された文字列</returns>
        private static string NormalizeNumbers(string inputText)
        {
            return inputText.Replace("０", "0")
                             .Replace("１", "1")
                             .Replace("２", "2")
                             .Replace("３", "3")
                             .Replace("４", "4")
                             .Replace("５", "5")
                             .Replace("６", "6")
                             .Replace("７", "7")
                             .Replace("８", "8")
                             .Replace("９", "9");
        }

        /// <summary>
        /// 全角文字を半角に変換する（アルファベット・数字）
        /// </summary>
        /// <param name="inputText">入力文字列</param>
        /// <returns>半角に変換された文字列</returns>
        private static string ToHalfWidth(string inputText)
        {
            char[] chars = inputText.ToCharArray();
            for (int i = 0; i < chars.Length; i++)
            {
                // アルファベット（小文字・大文字）を半角化
                if (chars[i] >= 'Ａ' && chars[i] <= 'Ｚ')
                {
                    chars[i] = (char)(chars[i] - 'Ａ' + 'A'); // 大文字を半角
                }
                else if (chars[i] >= 'ａ' && chars[i] <= 'ｚ')
                {
                    chars[i] = (char)(chars[i] - 'ａ' + 'a'); // 小文字を半角
                }
                // 数字を半角に変換
                else if (chars[i] >= '０' && chars[i] <= '９')
                {
                    chars[i] = (char)(chars[i] - '０' + '0'); // 数字を半角
                }
            }
            return new string(chars);
        }
    }
}