Skip to content

Commit

Permalink
Merge pull request #141 from mtgto/euc-jis-2004-data-extension
Browse files Browse the repository at this point in the history
EUC-JISX0213の文字列の読み込みをDataから行うようにする
  • Loading branch information
mtgto authored Apr 5, 2024
2 parents 3b26e2c + 6deafa2 commit 6b9aa11
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 63 deletions.
12 changes: 8 additions & 4 deletions macSKK.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
CE496C8C2B43968A001C623C /* LogView.swift in Sources */ = {isa = PBXBuildFile; fileRef = CE496C8B2B43968A001C623C /* LogView.swift */; };
CE496C912B440892001C623C /* URL+Additions.swift in Sources */ = {isa = PBXBuildFile; fileRef = CE496C902B440892001C623C /* URL+Additions.swift */; };
CE496C932B440B9B001C623C /* euc-jis-2004.txt in Resources */ = {isa = PBXBuildFile; fileRef = CE496C922B440B9B001C623C /* euc-jis-2004.txt */; };
CE496C952B440BBD001C623C /* URL+EucJis2004Tests.swift in Sources */ = {isa = PBXBuildFile; fileRef = CE496C942B440BBD001C623C /* URL+EucJis2004Tests.swift */; };
CE496C952B440BBD001C623C /* Data+EucJis2004Tests.swift in Sources */ = {isa = PBXBuildFile; fileRef = CE496C942B440BBD001C623C /* Data+EucJis2004Tests.swift */; };
CE496C982B440CDA001C623C /* libiconv.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = CE496C972B440CCA001C623C /* libiconv.tbd */; };
CE4CB5CC2AD557D90046FA34 /* NumberEntry.swift in Sources */ = {isa = PBXBuildFile; fileRef = CE4CB5CB2AD557D90046FA34 /* NumberEntry.swift */; };
CE4CB5CE2AD55DF90046FA34 /* NumberEntryTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = CE4CB5CD2AD55DF90046FA34 /* NumberEntryTests.swift */; };
Expand Down Expand Up @@ -84,6 +84,7 @@
CED7CA592A83CD67004EF988 /* releases.atom in Resources */ = {isa = PBXBuildFile; fileRef = CED7CA582A83CD67004EF988 /* releases.atom */; };
CED7CA5B2A83DE7F004EF988 /* SettingsViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = CED7CA5A2A83DE7F004EF988 /* SettingsViewModel.swift */; };
CED7F51F2AB5F4A7007FC6BD /* Character+Additions.swift in Sources */ = {isa = PBXBuildFile; fileRef = CED7F51E2AB5F4A7007FC6BD /* Character+Additions.swift */; };
CED987412BB953E7001B40F9 /* Data+EucJis2004.swift in Sources */ = {isa = PBXBuildFile; fileRef = CED987402BB953E7001B40F9 /* Data+EucJis2004.swift */; };
CEE2D9772A99FE1B00A4CD76 /* Word.swift in Sources */ = {isa = PBXBuildFile; fileRef = CEE2D9762A99FE1B00A4CD76 /* Word.swift */; };
CEE2D9792A99FEC700A4CD76 /* CandidateTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = CEE2D9782A99FEC700A4CD76 /* CandidateTest.swift */; };
CEE3717529653112000DB2C3 /* SoftwareUpdateView.swift in Sources */ = {isa = PBXBuildFile; fileRef = CEE3717429653112000DB2C3 /* SoftwareUpdateView.swift */; };
Expand Down Expand Up @@ -144,7 +145,7 @@
CE496C8B2B43968A001C623C /* LogView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LogView.swift; sourceTree = "<group>"; };
CE496C902B440892001C623C /* URL+Additions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "URL+Additions.swift"; sourceTree = "<group>"; };
CE496C922B440B9B001C623C /* euc-jis-2004.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = "euc-jis-2004.txt"; sourceTree = "<group>"; };
CE496C942B440BBD001C623C /* URL+EucJis2004Tests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "URL+EucJis2004Tests.swift"; sourceTree = "<group>"; };
CE496C942B440BBD001C623C /* Data+EucJis2004Tests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Data+EucJis2004Tests.swift"; sourceTree = "<group>"; };
CE496C972B440CCA001C623C /* libiconv.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libiconv.tbd; path = usr/lib/libiconv.tbd; sourceTree = SDKROOT; };
CE4CB5CB2AD557D90046FA34 /* NumberEntry.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = NumberEntry.swift; sourceTree = "<group>"; };
CE4CB5CD2AD55DF90046FA34 /* NumberEntryTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = NumberEntryTests.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -211,6 +212,7 @@
CED7CA582A83CD67004EF988 /* releases.atom */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; path = releases.atom; sourceTree = "<group>"; };
CED7CA5A2A83DE7F004EF988 /* SettingsViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsViewModel.swift; sourceTree = "<group>"; };
CED7F51E2AB5F4A7007FC6BD /* Character+Additions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Character+Additions.swift"; sourceTree = "<group>"; };
CED987402BB953E7001B40F9 /* Data+EucJis2004.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Data+EucJis2004.swift"; sourceTree = "<group>"; };
CEE2D9762A99FE1B00A4CD76 /* Word.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Word.swift; sourceTree = "<group>"; };
CEE2D9782A99FEC700A4CD76 /* CandidateTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CandidateTest.swift; sourceTree = "<group>"; };
CEE3717429653112000DB2C3 /* SoftwareUpdateView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftwareUpdateView.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -313,6 +315,7 @@
CEA78FAB2960401F00B67E25 /* String+Transform.swift */,
CED7F51E2AB5F4A7007FC6BD /* Character+Additions.swift */,
CE496C902B440892001C623C /* URL+Additions.swift */,
CED987402BB953E7001B40F9 /* Data+EucJis2004.swift */,
CE485A892A8FA5C6008271EF /* UserNotificationDelegate.swift */,
CE7F9ADA2AB53E31001B1877 /* View */,
CEC376E929651DE000D9C432 /* Settings */,
Expand Down Expand Up @@ -354,7 +357,7 @@
CED7CA3D2A8397E4004EF988 /* UpdateCheckerTests.swift */,
CE6DBA902A846C1700F5A227 /* ReleaseVersionTests.swift */,
CEA78FAD2961BA1D00B67E25 /* String+TransformTests.swift */,
CE496C942B440BBD001C623C /* URL+EucJis2004Tests.swift */,
CE496C942B440BBD001C623C /* Data+EucJis2004Tests.swift */,
CE06CA332AAC199500E80E5E /* UserDict+Utilities.swift */,
CED7CA572A83BFE9004EF988 /* fixture */,
);
Expand Down Expand Up @@ -623,6 +626,7 @@
CE4CB5CC2AD557D90046FA34 /* NumberEntry.swift in Sources */,
CEF3D86C2B9C022900BD1D3A /* WorkaroundApplicationView.swift in Sources */,
CE84A3DE29571797009394C4 /* Action.swift in Sources */,
CED987412BB953E7001B40F9 /* Data+EucJis2004.swift in Sources */,
CE485A882A8FA195008271EF /* Release+UNNotification.swift in Sources */,
CED7CA3A2A839505004EF988 /* FetchUpdateServiceProtocol.swift in Sources */,
CEA78FB02964209B00B67E25 /* UserDict.swift in Sources */,
Expand Down Expand Up @@ -659,7 +663,7 @@
CE4CB5CE2AD55DF90046FA34 /* NumberEntryTests.swift in Sources */,
CEADA44D2B025A8A0026E2BD /* EntryTests.swift in Sources */,
CEA78FAA295EBCAC00B67E25 /* StateMachineTests.swift in Sources */,
CE496C952B440BBD001C623C /* URL+EucJis2004Tests.swift in Sources */,
CE496C952B440BBD001C623C /* Data+EucJis2004Tests.swift in Sources */,
CEE2D9792A99FEC700A4CD76 /* CandidateTest.swift in Sources */,
CEF0823629685C0800646366 /* StateTests.swift in Sources */,
CED7CA3E2A8397E4004EF988 /* UpdateCheckerTests.swift in Sources */,
Expand Down
60 changes: 60 additions & 0 deletions macSKK/Data+EucJis2004.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// SPDX-FileCopyrightText: 2024 mtgto <[email protected]>
// SPDX-License-Identifier: GPL-3.0-or-later

import Foundation

enum EucJis2004Error: Error {
case unsupported
case convert
}

extension Data {
/**
* libiconvを使ってEUC-JPの拡張であるEUC-JISX0213としてデコードする。
*/
func eucJis2004String() throws -> String {
if isEmpty {
return ""
}
let cd = iconv_open("UTF-8".cString(using: .ascii), "EUC-JISX0213".cString(using: .ascii))
if cd == iconv_t(bitPattern: -1) {
logger.error("iconvの初期化に失敗しました")
throw EucJis2004Error.unsupported
}
defer {
if iconv_close(cd) == -1 {
logger.error("iconv変換ディスクリプタの解放に失敗しました: \(errno)")
}
}
var data = self
var inLeft = data.count
// EUC-JIS-2004は1文字で1..2バイト (ASCIIは1バイト)、UTF-8は1..4バイト (ASCIIは1バイト) なのでバッファサイズは2倍用意する
var outLeft = data.count * 2
var buffer = Array<CChar>(repeating: 0, count: outLeft)
return try data.withUnsafeMutableBytes {
var inPtr = $0.baseAddress?.assumingMemoryBound(to: CChar.self)
try buffer.withUnsafeMutableBufferPointer {
var outPtr = $0.baseAddress
let ret = iconv(cd, &inPtr, &inLeft, &outPtr, &outLeft)
if ret == -1 {
if errno == EBADF {
logger.error("iconv変換ディスクリプタの状態が異常です")
} else if errno == EILSEQ {
logger.error("入力に不正なバイト列が存在します")
} else if errno == E2BIG {
logger.error("EUC-JIS-2004からの変換先のバッファが足りません")
} else if errno == EINVAL {
logger.error("入力文字列が終端していません")
}
throw EucJis2004Error.convert
} else if ret > 0 {
logger.warning("EUC-JIS-2004から処理できない文字が \(ret) 文字ありました")
}
}
guard let str = String(validatingUTF8: buffer) else {
throw EucJis2004Error.convert
}
return str
}
}
}
7 changes: 2 additions & 5 deletions macSKK/FileDict.swift
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,9 @@ class FileDict: NSObject, DictProtocol, Identifiable {

private func loadString(_ url: URL) throws -> String {
if encoding == .japaneseEUC {
let data = try Data(contentsOf: url)
return try data.eucJis2004String()
// JIS X 2013 を使ったEUC-JIS-2004の場合があるため失敗したらiconvでUTF-8に変換する
do {
return try String(contentsOf: url, encoding: .japaneseEUC)
} catch {
return try url.eucJis2004String()
}
} else if encoding == .utf8 {
let data = try Data(contentsOf: url)
// UTF-8 BOMがついているか検査
Expand Down
51 changes: 0 additions & 51 deletions macSKK/URL+Additions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,58 +3,7 @@

import Foundation

enum EucJis2004Error: Error {
case unsupported
case convert
}

extension URL {
// EUC-JIS-2004でエンコードされているファイルからStringに読み込む
func eucJis2004String() throws -> String {
var data = try Data(contentsOf: self)
if data.isEmpty {
return ""
}
let cd = iconv_open("UTF-8".cString(using: .ascii), "EUC-JISX0213".cString(using: .ascii))
if cd == iconv_t(bitPattern: -1) {
throw EucJis2004Error.unsupported
}
defer {
if iconv_close(cd) == -1 {
logger.error("iconv変換ディスクリプタの解放に失敗しました: \(errno)")
}
}
var inLeft = data.count
// EUC-JIS-2004は1文字で1..2バイト (ASCIIは1バイト)、UTF-8は1..4バイト (ASCIIは1バイト) なのでバッファサイズは2倍用意する
var outLeft = data.count * 2
var buffer = Array<CChar>(repeating: 0, count: outLeft)
return try data.withUnsafeMutableBytes {
var inPtr = $0.baseAddress?.assumingMemoryBound(to: CChar.self)
try buffer.withUnsafeMutableBufferPointer {
var outPtr = $0.baseAddress
let ret = iconv(cd, &inPtr, &inLeft, &outPtr, &outLeft)
if ret == -1 {
if errno == EBADF {
logger.error("iconv変換ディスクリプタの状態が異常です")
} else if errno == EILSEQ {
logger.error("入力に不正なバイト列が存在します")
} else if errno == E2BIG {
logger.error("EUC-JIS-2004からの変換先のバッファが足りません")
} else if errno == EINVAL {
logger.error("入力文字列が終端していません")
}
throw EucJis2004Error.convert
} else if ret > 0 {
logger.warning("EUC-JIS-2004から処理できない文字が \(ret) 文字ありました")
}
}
guard let str = String(validatingUTF8: buffer) else {
throw EucJis2004Error.convert
}
return str
}
}

/**
* 読み込み可能なファイルかどうかを返す
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,21 @@ import XCTest
final class URLEucJis2004Tests: XCTestCase {
func testLoad() throws {
let fileURL = Bundle(for: Self.self).url(forResource: "euc-jis-2004", withExtension: "txt")!
XCTAssertEqual(try fileURL.eucJis2004String(), "川﨑")
let data = try Data(contentsOf: fileURL)
XCTAssertEqual(try data.eucJis2004String(), "川﨑")
}

func testLoadFail() throws {
let fileURL = Bundle(for: Self.self).url(forResource: "SKK-JISYO.test", withExtension: "utf8")!
XCTAssertThrowsError(try fileURL.eucJis2004String()) {
let data = try Data(contentsOf: fileURL)
XCTAssertThrowsError(try data.eucJis2004String()) {
XCTAssertEqual($0 as! EucJis2004Error, EucJis2004Error.convert)
}
}

func testLoadEmpty() throws {
let fileURL = Bundle(for: Self.self).url(forResource: "empty", withExtension: "txt")!
XCTAssertEqual(try fileURL.eucJis2004String(), "")
let data = try Data(contentsOf: fileURL)
XCTAssertEqual(try data.eucJis2004String(), "")
}
}

0 comments on commit 6b9aa11

Please sign in to comment.