Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Commit 928cdf7

Browse files
committed
Add concurrentMap
1 parent a5e1b1d commit 928cdf7

File tree

1 file changed

+21
-7
lines changed

1 file changed

+21
-7
lines changed

Datasets/TextUnsupervised/TextUnsupervised.swift

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,23 @@ import Foundation
1717
import ModelSupport
1818
import TensorFlow
1919

20+
extension Array {
21+
func concurrentMap<B>(_ transform: @escaping (Element) -> B) -> [B] {
22+
var res = Array<B?>(repeating: nil, count: count)
23+
let threadCount = Swift.min(count, 10)
24+
let q = DispatchQueue(label: "sync queue")
25+
DispatchQueue.concurrentPerform(iterations: threadCount) { threadId in
26+
for idx in stride(from: threadId, to: count, by: threadCount) {
27+
let transformed = transform(self[idx])
28+
q.sync {
29+
res[idx] = transformed
30+
}
31+
}
32+
}
33+
return res.map { $0! }
34+
}
35+
}
36+
2037
public enum TextUnsupervisedVariant: String {
2138
/// - Source: [Einstein AI WikiText-103](
2239
/// https://blog.einstein.ai/
@@ -174,14 +191,11 @@ public struct TextUnsupervised {
174191
let path = directory.appendingPathComponent("\(variantDetails.filename)/\(name).csv")
175192
let documentsFull = try readCSV(in: path)
176193
let documents = Array(documentsFull[0..<min(documentCount, documentsFull.count)])
177-
encodedDocs = documents.map { embedding(for: $0, bpe: bpe) }
178-
} else {
194+
encodedDocs = documents.concurrentMap { embedding(for: $0, bpe: bpe) }
195+
} else {
179196
let pathPrefix = directory.appendingPathComponent("\(variantDetails.encodedFileName!)/\(name)").path
180-
for i in 0..<documentCount {
181-
encodedDocs += [
182-
try readEncoded(in: URL(fileURLWithPath: "\(pathPrefix)/doc_\(i).txt"))
183-
]
184-
}
197+
encodedDocs = (0..<documentCount).map {URL(fileURLWithPath: "\(pathPrefix)/doc_\($0).txt")}
198+
.concurrentMap {try! readEncoded(in: $0)}
185199
}
186200

187201
return LanguageModelDataset(

0 commit comments

Comments
 (0)