Skip to content

Commit

Permalink
restrict maximum productions
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Feb 13, 2024
1 parent e8ffb38 commit a8137e9
Showing 1 changed file with 6 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,13 @@ infix fun CFG.jvmIntersectLevFSA(fsa: FSA): CFG = jvmIntersectLevFSAP(fsa)

val BH_TIMEOUT = 9.minutes
val MINFREEMEM = 1000000000L
val MAX_NTS = 4_000_000 // Give each nonterminal about ~35kb of memory
val MAX_NTS = 4_000_000 // Gives each nonterminal about ~35kb of memory on Xmx=150GB
val MAX_PRODS = 200_000_000

val maxNTsSeen = AtomicInteger(0)

private infix fun CFG.jvmIntersectLevFSAP(fsa: FSA): CFG {
if (fsa.Q.size < 650) throw Exception("FSA size was out of bounds")
// if (fsa.Q.size < 650) throw Exception("FSA size was out of bounds")
var clock = TimeSource.Monotonic.markNow()

val lengthBoundsCache = lengthBounds
Expand All @@ -163,13 +164,14 @@ private infix fun CFG.jvmIntersectLevFSAP(fsa: FSA): CFG {
val binaryProds =
prods.parallelStream().flatMap {
// if (i++ % 100 == 0) println("Finished $i/${nonterminalProductions.size} productions")
if (BH_TIMEOUT < clock.elapsedNow() || MAX_NTS < nts.size) throw Exception("Timeout: ${nts.size} nts")
if (BH_TIMEOUT < clock.elapsedNow()) throw Exception("Timeout: ${nts.size} nts")
val (A, B, C) = it.π1 to it.π2[0] to it.π2[1]
validTriples.stream()
// CFG ∩ FSA - in general we are not allowed to do this, but it works
// because we assume a Levenshtein FSA, which is monotone and acyclic.
.filter { it.isCompatibleWith(A to B to C, fsa, lengthBoundsCache) }
.map { (a, b, c) ->
if (MAX_PRODS < counter.incrementAndGet()) throw Exception("Too many productions!")
val (p, q, r) = a.π1 to b.π1 to c.π1
"[$p~$A~$r]".also { nts.add(it) } to listOf("[$p~$B~$q]", "[$q~$C~$r]")
}
Expand All @@ -179,7 +181,7 @@ private infix fun CFG.jvmIntersectLevFSAP(fsa: FSA): CFG {
first() == '[' && length > 1 // && last() == ']' && count { it == '~' } == 2

val totalProds = binaryProds.size + transits.size + unitProds.size + initFinal.size
println("Constructed ∩-grammar with $totalProds in ${clock.elapsedNow()}")
println("Constructed ∩-grammar with $totalProds productions in ${clock.elapsedNow()}")

clock = TimeSource.Monotonic.markNow()
return Stream.concat(binaryProds.stream(),
Expand Down

0 comments on commit a8137e9

Please sign in to comment.