Skip to content

Commit 618734a

Browse files
authored
Deduplicator: Added new comparison method pHash that allows finding "similar images" (d4rken-org#791)
* PHash UI work and setup * Working PHash implementation Still need to filter out the checksums results from phash results. * PHash and Checksums results as merged clusters + deletion and pruning behavior.
1 parent f2b48a3 commit 618734a

40 files changed

+1193
-274
lines changed

app-common-test/src/main/java/testhelpers/coroutine/CoroutinesTestExtension.kt

Lines changed: 0 additions & 24 deletions
This file was deleted.

app-common/build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ dependencies {
4646
addIO()
4747
addTesting()
4848

49-
implementation("io.coil-kt:coil:2.0.0-rc02")
49+
implementation("io.coil-kt:coil:${Versions.Coil.core}")
5050

5151
implementation("com.airbnb.android:lottie:3.5.0")
5252

app-common/src/main/java/eu/darken/sdmse/common/hashing/Hasher.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class Hasher(
2828
val type: Type,
2929
val hash: ByteString,
3030
) {
31-
fun formatAs(format: Format): String = when (format) {
31+
fun format(format: Format = Format.HEX): String = when (format) {
3232
Format.HEX -> hash.hex()
3333
Format.BASE64 -> hash.base64()
3434
}

app-common/src/test/java/eu/darken/sdmse/common/hashing/HasherTest.kt

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package eu.darken.sdmse.common.hashing
22

33
import eu.darken.sdmse.common.files.local.deleteAll
4-
import eu.darken.sdmse.common.hashing.Hasher.Result.Format
54
import eu.darken.sdmse.common.hashing.Hasher.Type
65
import io.kotest.matchers.shouldBe
76
import kotlinx.coroutines.test.runTest
@@ -24,54 +23,54 @@ class HasherTest : BaseTest() {
2423
@Test fun `MD5 direct`() = runTest {
2524
"SD Maid 2/SE"
2625
.hash(Type.MD5)
27-
.formatAs(Format.HEX) shouldBe "7604efc2f2f3dd78558b0f54d5fda072"
26+
.format() shouldBe "7604efc2f2f3dd78558b0f54d5fda072"
2827
"SD Maid 2/SE"
2928
.toByteArray()
3029
.hash(Type.MD5)
31-
.formatAs(Format.HEX) shouldBe "7604efc2f2f3dd78558b0f54d5fda072"
30+
.format() shouldBe "7604efc2f2f3dd78558b0f54d5fda072"
3231
}
3332

3433
@Test fun `SHA1 direct`() = runTest {
3534
"SD Maid 2/SE"
3635
.hash(Type.SHA1)
37-
.formatAs(Format.HEX) shouldBe "e0b61786d683c92f873f19bf740f569df0a547e5"
36+
.format() shouldBe "e0b61786d683c92f873f19bf740f569df0a547e5"
3837
"SD Maid 2/SE"
3938
.toByteArray()
4039
.hash(Type.SHA1)
41-
.formatAs(Format.HEX) shouldBe "e0b61786d683c92f873f19bf740f569df0a547e5"
40+
.format() shouldBe "e0b61786d683c92f873f19bf740f569df0a547e5"
4241
}
4342

4443
@Test fun `SHA256 direct`() = runTest {
4544
"SD Maid 2/SE"
4645
.hash(Type.SHA256)
47-
.formatAs(Format.HEX) shouldBe "b3f545cdd32949087fa68f7c2adb3782e21204067912b1b3d893270c94d67d12"
46+
.format() shouldBe "b3f545cdd32949087fa68f7c2adb3782e21204067912b1b3d893270c94d67d12"
4847
"SD Maid 2/SE"
4948
.toByteArray()
5049
.hash(Type.SHA256)
51-
.formatAs(Format.HEX) shouldBe "b3f545cdd32949087fa68f7c2adb3782e21204067912b1b3d893270c94d67d12"
50+
.format() shouldBe "b3f545cdd32949087fa68f7c2adb3782e21204067912b1b3d893270c94d67d12"
5251
}
5352

5453
@Test fun `MD5 from file`() = runTest {
5554
File(testFolder, "MD5")
5655
.apply { "SD Maid 2/SE".writeToFile(this) }
5756
.source()
5857
.hash(Type.MD5)
59-
.formatAs(Format.HEX) shouldBe "7604efc2f2f3dd78558b0f54d5fda072"
58+
.format() shouldBe "7604efc2f2f3dd78558b0f54d5fda072"
6059
}
6160

6261
@Test fun `SHA1 from file`() = runTest {
6362
File(testFolder, "SHA1")
6463
.apply { "SD Maid 2/SE".writeToFile(this) }
6564
.source()
6665
.hash(Type.SHA1)
67-
.formatAs(Format.HEX) shouldBe "e0b61786d683c92f873f19bf740f569df0a547e5"
66+
.format() shouldBe "e0b61786d683c92f873f19bf740f569df0a547e5"
6867
}
6968

7069
@Test fun `SHA256 from file`() = runTest {
7170
File(testFolder, "SHA256")
7271
.apply { "SD Maid 2/SE".writeToFile(this) }
7372
.source()
7473
.hash(Type.SHA256)
75-
.formatAs(Format.HEX) shouldBe "b3f545cdd32949087fa68f7c2adb3782e21204067912b1b3d893270c94d67d12"
74+
.format() shouldBe "b3f545cdd32949087fa68f7c2adb3782e21204067912b1b3d893270c94d67d12"
7675
}
7776
}

app/build.gradle.kts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,8 @@ dependencies {
170170

171171
implementation("com.airbnb.android:lottie:3.5.0")
172172

173-
implementation("io.coil-kt:coil:2.4.0")
174-
implementation("io.coil-kt:coil-video:2.4.0")
173+
implementation("io.coil-kt:coil:${Versions.Coil.core}")
174+
implementation("io.coil-kt:coil-video:${Versions.Coil.core}")
175175

176176
implementation("androidx.swiperefreshlayout:swiperefreshlayout:1.1.0")
177177
implementation("com.github.reddit:IndicatorFastScroll:f9576c7") // 1.4.0

app/src/main/java/eu/darken/sdmse/common/coil/CoilExtensions.kt

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,22 @@ fun ImageView.loadAppIcon(pkg: Pkg): Disposable? {
3939
return context.imageLoader.enqueue(request)
4040
}
4141

42-
fun ImageView.loadFilePreview(lookup: APathLookup<*>): Disposable? {
42+
fun ImageView.loadFilePreview(
43+
lookup: APathLookup<*>,
44+
options: ImageRequest.Builder.(APathLookup<*>) -> Unit = {
45+
val alt = lookup.fileType.iconRes
46+
fallback(alt)
47+
error(alt)
48+
}
49+
): Disposable? {
4350
val current = tag as? APathLookup<*>
4451
if (current?.lookedUp == lookup.lookedUp) return null
4552
tag = lookup
4653

4754
val request = ImageRequest.Builder(context).apply {
4855
data(lookup)
49-
val alt = lookup.fileType.iconRes
50-
fallback(alt)
51-
error(alt)
5256
target(this@loadFilePreview)
57+
options(lookup)
5358
}.build()
5459

5560
return context.imageLoader.enqueue(request)

app/src/main/java/eu/darken/sdmse/common/progress/Progress.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ interface Progress {
4444
return "${ceil(((current.toDouble() / max.toDouble()) * 100)).toInt()}%"
4545
}
4646

47-
fun increment(): Percent {
48-
return Percent(current + 1, max)
47+
fun increment(value: Int = 1): Percent {
48+
return Percent(current + value, max)
4949
}
5050
}
5151

@@ -57,7 +57,7 @@ interface Progress {
5757

5858
override fun displayValue(context: Context): String = "$current/$max"
5959

60-
fun increment() = Counter(current + 1, max)
60+
fun increment(value: Int = 1) = Counter(current + value, max)
6161
}
6262

6363
data class Size(override val current: Long, override val max: Long) : Count {

app/src/main/java/eu/darken/sdmse/common/progress/ProgressExtensions.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ fun <T : Progress.Client> T.updateProgressSecondary(resolv: (Context) -> String)
5353
updateProgress { (it ?: Progress.Data()).copy(secondary = resolv.toCaString()) }
5454
}
5555

56-
fun <T : Progress.Client> T.updateProgressSecondary(secondary: CaString) {
56+
fun <T : Progress.Client> T.updateProgressSecondary(secondary: CaString = CaString.EMPTY) {
5757
updateProgress { (it ?: Progress.Data()).copy(secondary = secondary) }
5858
}
5959

@@ -65,11 +65,11 @@ fun <T : Progress.Client> T.updateProgressCount(count: Progress.Count) {
6565
updateProgress { (it ?: Progress.Data()).copy(count = count) }
6666
}
6767

68-
fun <T : Progress.Client> T.increaseProgress() {
68+
fun <T : Progress.Client> T.increaseProgress(value: Int = 1) {
6969
updateProgress {
7070
when (it?.count) {
71-
is Progress.Count.Counter -> it.copy(count = (it.count as Progress.Count.Counter).increment())
72-
is Progress.Count.Percent -> it.copy(count = (it.count as Progress.Count.Percent).increment())
71+
is Progress.Count.Counter -> it.copy(count = (it.count as Progress.Count.Counter).increment(value))
72+
is Progress.Count.Percent -> it.copy(count = (it.count as Progress.Count.Percent).increment(value))
7373
else -> {
7474
log(ERROR) { "Can't increaseProgress() on type: ${it?.count}" }
7575
it

app/src/main/java/eu/darken/sdmse/deduplicator/core/Deduplicator.kt

Lines changed: 16 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import eu.darken.sdmse.common.sharedresource.keepResourceHoldersAlive
1818
import eu.darken.sdmse.corpsefinder.core.tasks.*
1919
import eu.darken.sdmse.deduplicator.core.deleter.DuplicatesDeleter
2020
import eu.darken.sdmse.deduplicator.core.scanner.DuplicatesScanner
21-
import eu.darken.sdmse.deduplicator.core.scanner.Sleuth
2221
import eu.darken.sdmse.deduplicator.core.scanner.checksum.ChecksumDuplicate
2322
import eu.darken.sdmse.deduplicator.core.scanner.phash.PHashDuplicate
2423
import eu.darken.sdmse.deduplicator.core.tasks.DeduplicatorDeleteTask
@@ -32,12 +31,7 @@ import kotlinx.coroutines.CancellationException
3231
import kotlinx.coroutines.CoroutineScope
3332
import kotlinx.coroutines.flow.Flow
3433
import kotlinx.coroutines.flow.MutableStateFlow
35-
import kotlinx.coroutines.flow.asFlow
3634
import kotlinx.coroutines.flow.combine
37-
import kotlinx.coroutines.flow.filter
38-
import kotlinx.coroutines.flow.map
39-
import kotlinx.coroutines.flow.onEach
40-
import kotlinx.coroutines.flow.toList
4135
import kotlinx.coroutines.sync.Mutex
4236
import kotlinx.coroutines.sync.withLock
4337
import javax.inject.Inject
@@ -47,8 +41,7 @@ import javax.inject.Singleton
4741
@Singleton
4842
class Deduplicator @Inject constructor(
4943
@AppScope private val appScope: CoroutineScope,
50-
private val sleuthFactories: Set<@JvmSuppressWildcards Sleuth.Factory>,
51-
private val gatewaySwitch: GatewaySwitch,
44+
gatewaySwitch: GatewaySwitch,
5245
private val exclusionManager: ExclusionManager,
5346
private val scanner: Provider<DuplicatesScanner>,
5447
private val deleter: Provider<DuplicatesDeleter>,
@@ -108,28 +101,11 @@ class Deduplicator @Inject constructor(
108101

109102
internalData.value = null
110103

111-
val sleuths = sleuthFactories
112-
.asFlow()
113-
.filter { it.isEnabled() }
114-
.map { it.create() }
115-
.onEach { log(TAG) { "Sleuth created: $it" } }
116-
.toList()
117-
118104
val results = scanner.get().withProgress(this) {
119-
scan(sleuths)
105+
scan()
120106
}
121107

122-
log(TAG, INFO) { "performScan():${results.size} clusters found" }
123-
124-
results.forEach { c ->
125-
log(TAG, VERBOSE) { "performScan(): Cluster ${c.identifier}: ${c.groups.size} groups, ${c.count} dupes" }
126-
c.groups.forEach { g ->
127-
log(TAG, VERBOSE) { "performScan(): Group ${g.identifier}: ${g.duplicates.size} dupes" }
128-
g.duplicates.forEach { d ->
129-
log(TAG, VERBOSE) { "performScan(): Duplicate: $d" }
130-
}
131-
}
132-
}
108+
log(TAG, INFO) { "performScan(): ${results.size} clusters found" }
133109

134110
log(TAG) { "Warming up fields..." }
135111
results.forEach { it.totalSize }
@@ -151,13 +127,14 @@ class Deduplicator @Inject constructor(
151127
val snapshot = internalData.value!!
152128

153129
val result = deleter.get().withProgress(this) { delete(task, snapshot) }
130+
154131
updateProgress { Progress.DEFAULT_STATE }
155-
val pruneResult = snapshot.prune(result)
156-
internalData.value = pruneResult.newData
132+
133+
internalData.value = snapshot.prune(result.success.map { it.identifier }.toSet())
157134

158135
return DeduplicatorDeleteTask.Success(
159-
deletedItems = pruneResult.count,
160-
recoveredSpace = pruneResult.freed,
136+
deletedItems = result.success.size,
137+
recoveredSpace = result.success.sumOf { it.size },
161138
)
162139
}
163140

@@ -268,17 +245,7 @@ class Deduplicator @Inject constructor(
268245
}
269246
}
270247

271-
data class PruneResult(
272-
val newData: Deduplicator.Data,
273-
val removed: Collection<Duplicate>,
274-
val freed: Long,
275-
) {
276-
val count: Int
277-
get() = removed.size
278-
}
279-
280-
internal fun Deduplicator.Data.prune(deleter: DuplicatesDeleter.Deleted): PruneResult {
281-
val removedDupes = mutableSetOf<Duplicate>()
248+
internal fun Deduplicator.Data.prune(deletedIds: Set<Duplicate.Id>): Deduplicator.Data {
282249
val newClusters = this.clusters
283250
.asSequence()
284251
.map { oldCluster ->
@@ -287,11 +254,8 @@ internal fun Deduplicator.Data.prune(deleter: DuplicatesDeleter.Deleted): PruneR
287254
.map { oldGroup ->
288255
val newDuplicates: Set<Duplicate> = oldGroup.duplicates
289256
.filter { toDelete ->
290-
val wasDeleted = deleter.success.contains(toDelete.identifier)
291-
if (wasDeleted) {
292-
log(Deduplicator.TAG) { "Prune: Deleted duplicate: $toDelete" }
293-
removedDupes.add(toDelete)
294-
}
257+
val wasDeleted = deletedIds.contains(toDelete.identifier)
258+
if (wasDeleted) log(Deduplicator.TAG) { "Prune: Deleted duplicate: $toDelete" }
295259
!wasDeleted
296260
}
297261
.toSet()
@@ -311,25 +275,19 @@ internal fun Deduplicator.Data.prune(deleter: DuplicatesDeleter.Deleted): PruneR
311275
}
312276
.filter {
313277
// group may be empty after removing duplicates
314-
val solo = it.duplicates.size < 2
315278
val empty = it.duplicates.isEmpty()
316279
if (empty) log(Deduplicator.TAG) { "Prune: Empty group: $it" }
317-
if (solo) log(Deduplicator.TAG) { "Prune: Solo group: $it" }
318-
!solo && !empty
280+
!empty
319281
}
320282
.toSet()
321283
oldCluster.copy(groups = newGroups)
322284
}
323285
.filter {
324-
val clusterEmpty = it.groups.isEmpty()
325-
if (clusterEmpty) log(Deduplicator.TAG) { "Prune: Empty cluster: $it" }
326-
!clusterEmpty
286+
val isSolo = it.count < 2
287+
if (isSolo) log(Deduplicator.TAG) { "Prune: Cluster only has one item: $it" }
288+
!isSolo
327289
}
328290
.toSet()
329291

330-
return PruneResult(
331-
newData = this.copy(clusters = newClusters),
332-
removed = removedDupes,
333-
freed = removedDupes.sumOf { it.size }
334-
)
292+
return this.copy(clusters = newClusters)
335293
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
package eu.darken.sdmse.deduplicator.core
22

33
val Deduplicator.Data?.hasData: Boolean
4-
get() = this?.clusters?.isNotEmpty() ?: false
4+
get() = this?.clusters?.isNotEmpty() ?: false

0 commit comments

Comments
 (0)