Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
func solution(_ str1: String, _ str2: String) -> Int {
let jac = jaccard(str1, str2)
return Int(jac * 65536)
}

// 자카드 유사도
func jaccard(_ s1: String, _ s2: String) -> Double {
let set1 = makeSet(s1)
var set2 = makeSet(s2)

if set1.isEmpty && set2.isEmpty { return 1.0 }

var intersection: [String] = []
var unionCount: Int = 0

for element in set1 {
if let idx = set2.firstIndex(of: element) {
intersection.append(element)
set2.remove(at: idx)
}
}

unionCount = set1.count + set2.count
return Double(intersection.count) / Double(unionCount)
}
Comment on lines +7 to +25

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

jaccard 함수에서 교집합과 합집합을 계산하는 현재 방식은 배열에 대해 firstIndex(of:)remove(at:)를 반복적으로 사용합니다. 이 배열 연산은 최악의 경우 O(N*M)의 시간 복잡도를 가질 수 있어, 입력 크기가 커질수록 성능 저하가 발생할 수 있습니다.
makeSet 함수에서 반환하는 딕셔너리(다중집합)를 활용하면, 교집합과 합집합을 O(N+M) 시간 복잡도로 효율적으로 계산할 수 있습니다. 제안된 코드는 다중집합의 합집합 공식인 |A ∪ B| = |A| + |B| - |A ∩ B|를 사용합니다.

func jaccard(_ s1: String, _ s2: String) -> Double {
    let multiset1 = makeSet(s1)
    let multiset2 = makeSet(s2)

    if multiset1.isEmpty && multiset2.isEmpty { return 1.0 }

    var intersectionCount = 0

    // 교집합 계산
    for (bigram, count1) in multiset1 {
        if let count2 = multiset2[bigram] {
            intersectionCount += min(count1, count2)
        }
    }

    // 합집합 계산: |A U B| = |A| + |B| - |A intersect B|
    let sumOfCounts1 = multiset1.values.reduce(0, +)
    let sumOfCounts2 = multiset2.values.reduce(0, +)
    let unionCount = sumOfCounts1 + sumOfCounts2 - intersectionCount

    // unionCount가 0인 경우 (두 다중집합 모두 비어있거나, 교집합이 전체를 상쇄하는 경우 등)
    if unionCount == 0 { return 1.0 }

    return Double(intersectionCount) / Double(unionCount)
}


// 집합 만들기
func makeSet(_ string: String) -> [String] {
let chars = Array(string.lowercased())
var result: [String] = []

for i in 0..<chars.count-1 {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

The makeSet function has a potential Denial of Service (DoS) vulnerability. If an empty string is passed, the range 0..<chars.count-1 becomes 0..<-1, causing a runtime crash in Swift. A robust implementation should handle empty inputs to prevent this. Additionally, for better performance and clearer multiset representation, consider changing makeSet to return a dictionary [String: Int] to store bigram counts, which would also improve the efficiency of jaccard function calculations.

Suggested change
for i in 0..<chars.count-1 {
for i in 0..<max(0, chars.count - 1) {

let c1 = chars[i]
let c2 = chars[i+1]

if c1.isLetter && c2.isLetter {
result.append(String([c1, c2]))
}
}

return result
}