Skip to content

Commit

Permalink
disable length bounds precomputation
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Apr 14, 2024
1 parent 127e3c8 commit ff81d59
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 18 deletions.
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
package ai.hypergraph.kaliningraph.parsing

import ai.hypergraph.kaliningraph.automata.*
import ai.hypergraph.kaliningraph.hash
import ai.hypergraph.kaliningraph.repair.MAX_TOKENS
import ai.hypergraph.kaliningraph.types.*
import ai.hypergraph.kaliningraph.types.times
import kotlin.math.*
import kotlin.random.Random
import kotlin.time.TimeSource

/**
Expand Down Expand Up @@ -53,8 +51,8 @@ private infix fun CFG.intersectLevFSAP(fsa: FSA): CFG {
// we have the production [p,A,r] → [p,B,q] [q,C,r] in P′.
val prods: Set<Pair<Int, List<Int>>> = nonterminalProductions
.map { (a, bc) -> ntMap[a]!! to bc.map { ntMap[it]!! } }.toSet()
val lengthBoundsCache = lengthBounds.let { lb -> ntLst.map { lb[it] ?: 0..0 } }
val validTriples: List<Triple<STC, STC, STC>> = fsa.validTriples
// val lengthBoundsCache = lengthBounds.let { lb -> ntLst.map { lb[it] ?: 0..0 } }
val validTriples = fsa.validTriples.map { arrayOf(it.π11, it.π21, it.π31) }.toTypedArray()

val ct = (fsa.validPairs * nonterminals.indices.toSet()).toList()
// val ct1 = Array(fsa.states.size) { Array(nonterminals.size) { Array(fsa.states.size) { false } } }
Expand All @@ -68,15 +66,15 @@ private infix fun CFG.intersectLevFSAP(fsa: FSA): CFG {
prods.map {
// if (i % 100 == 0) println("Finished ${i}/${nonterminalProductions.size} productions")
val (A, B, C) = it.π1 to it.π2[0] to it.π2[1]
val trip = A to B to C
val trip = arrayOf(A, B, C)
validTriples
// CFG ∩ FSA - in general we are not allowed to do this, but it works
// because we assume a Levenshtein FSA, which is monotone and acyclic.
// .filter { it.checkCT(trip, ct1) }
.filter { it.checkCT(trip, ct2) }
.filter { it.checkCompatibility(trip, ct2) }
// .filter { it.obeysLevenshteinParikhBounds(A to B to C, fsa, parikhMap) }
.map { (a, b, c) ->
val (p, q, r) = fsa.stateLst[a1] to fsa.stateLst[b1] to fsa.stateLst[c1]
val (p, q, r) = fsa.stateLst[a] to fsa.stateLst[b] to fsa.stateLst[c]
"[$p~${ntLst[A]}~$r]".also { nts.add(it) } to listOf("[$p~${ntLst[B]}~$q]", "[$q~${ntLst[C]}~$r]")
}.toList()
}.flatten().filterRHSInNTS()
Expand Down Expand Up @@ -291,7 +289,7 @@ fun Π3A<STC>.isCompatibleWith(nts: Π3A<Int>, fsa: FSA, lengthBounds: List<IntR
&& lengthBounds[nts.second].overlaps(fsa.SPLP(first, second))
&& lengthBounds[nts.third].overlaps(fsa.SPLP(second, third))

fun Π3A<STC>.checkCT(nts: Π3A<Int>, ct: Array<Array<Array<Boolean>>>): Boolean =
ct[π11][nts1][π31] &&
ct[π11][nts2][π21] &&
ct[π21][nts3][π31]
fun Array<Int>.checkCompatibility(nts: Array<Int>, ct: Array<Array<Array<Boolean>>>): Boolean =
ct[this[0]][nts[0]][this[2]] &&
ct[this[0]][nts[1]][this[1]] &&
ct[this[1]][nts[2]][this[2]]
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,17 @@ private fun CFG.jvmIntersectLevFSAP(fsa: FSA, parikhMap: ParikhMap): CFG {
// we have the production [p,A,r] → [p,B,q] [q,C,r] in P′.
val prods = nonterminalProductions
.map { (a, b) -> ntMap[a]!! to b.map { ntMap[it]!! } }.toSet()
val lengthBoundsCache = lengthBounds.let { lb -> nonterminals.map { lb[it] ?: 0..0 } }
val validTriples: List<Triple<STC, STC, STC>> = fsa.validTriples
// val lengthBoundsCache = lengthBounds.let { lb -> nonterminals.map { lb[it] ?: 0..0 } }
val validTriples = fsa.validTriples.map { arrayOf(it.π11, it.π21, it.π31) }

val ct = (fsa.validPairs * nonterminals.indices.toSet()).toList()
val ct2 = Array(fsa.states.size) { Array(nonterminals.size) { Array(fsa.states.size) { false } } }
ct.filter { lengthBoundsCache[it.π3].overlaps(fsa.SPLP(it.π1, it.π2)) && fsa.obeys(it.π1, it.π2, it.π3, parikhMap) }
.forEach { ct2[it.π11][it.π3][it.π21] = true }
ct.parallelStream()
.filter { fsa.obeys(it.π1, it.π2, it.π3, parikhMap) }
.toList().also {
val fraction = it.size.toDouble() / (fsa.states.size * nonterminals.size * fsa.states.size)
println("Fraction of valid triples: $fraction")
}.forEach { ct2[it.π11][it.π3][it.π21] = true }

val elimCounter = AtomicInteger(0)
val counter = AtomicInteger(0)
Expand All @@ -182,17 +186,17 @@ private fun CFG.jvmIntersectLevFSAP(fsa: FSA, parikhMap: ParikhMap): CFG {
prods.parallelStream().flatMap {
if (BH_TIMEOUT < clock.elapsedNow()) throw Exception("Timeout: ${nts.size} nts")
val (A, B, C) = it.π1 to it.π2[0] to it.π2[1]
val trip = A to B to C
val trip = arrayOf(A, B, C)
validTriples.stream()
// CFG ∩ FSA - in general we are not allowed to do this, but it works
// because we assume a Levenshtein FSA, which is monotone and acyclic.
// .filter { it.isCompatibleWith(A to B to C, fsa, lengthBoundsCache).also { if (!it) elimCounter.incrementAndGet() } }
// .filter { it.checkCT(trip, ct1).also { if (!it) elimCounter.incrementAndGet() } }
// .filter { it.obeysLevenshteinParikhBounds(A to B to C, fsa, parikhMap).also { if (!it) elimCounter.incrementAndGet() } }
.filter { it.checkCT(trip, ct2).also { if (!it) elimCounter.incrementAndGet() } }
.filter { it.checkCompatibility(trip, ct2).also { if (!it) elimCounter.incrementAndGet() } }
.map { (a, b, c) ->
if (MAX_PRODS < counter.incrementAndGet()) throw Exception("∩-grammar has too many productions! (>$MAX_PRODS)")
val (p, q, r) = fsa.stateLst[a1] to fsa.stateLst[b1] to fsa.stateLst[c1]
val (p, q, r) = fsa.stateLst[a] to fsa.stateLst[b] to fsa.stateLst[c]
"[$p~${ntLst[A]}~$r]".also { nts.add(it) } to listOf("[$p~${ntLst[B]}~$q]", "[$q~${ntLst[C]}~$r]")
}
}.toList()
Expand Down

0 comments on commit ff81d59

Please sign in to comment.