Skip to content

Commit

Permalink
сделал мультипроцессинг по задаче Генриха
Browse files Browse the repository at this point in the history
  • Loading branch information
iMissile committed Sep 21, 2016
1 parent 6428b6a commit 8e28197
Show file tree
Hide file tree
Showing 12 changed files with 116 additions and 975 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ vignettes/*.pdf

*.jpg
*.rar
_phjs/1.js
1 change: 1 addition & 0 deletions 50 Rmd_learning/1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PATH=C:\Program Files (x86)\PuTTY\;c:\Rtools\bin;c:\Rtools\gcc-4.6.3\bin;C:\Python27\;C:\Python27\Scripts;C:\Program Files (x86)\Common Files\Intel\Shared Files\cpp\bin\Intel64;C:\ProgramData\Oracle\Java\javapath;C:\Program Files (x86)\PC Connectivity Solution\;C:\Program Files\Broadcom\Broadcom 802.11 Network Adapter\Driver;C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common;C:\Program Files (x86)\Intel\iCLS Client\;C:\Program Files\Intel\iCLS Client\;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Program Files\Intel\Intel(R) Management Engine Components\DAL;C:\Program Files\Intel\Intel(R) Management Engine Components\IPT;C:\Program Files (x86)\Intel\Intel(R) Management Engine Components\DAL;C:\Program Files (x86)\Intel\Intel(R) Management Engine Components\IPT;C:\Program Files (x86)\Intel\OpenCL SDK\2.0\bin\x86;C:\Program Files (x86)\Intel\OpenCL SDK\2.0\bin\x64;C:\Program Files\Lenovo\Bluetooth Software\;C:\Program Files\Lenovo\Bluetooth Software\syswow64;C:\Program Files\Lenovo Fingerprint Reader\;C:\Program Files\Lenovo Fingerprint Reader\x86\;C:\ProgramData\Lenovo\ReadyApps;C:\Program Files (x86)\Skype\Phone\;C:\Program Files\EmEditor;C:\Program Files\Microsoft SQL Server\120\Tools\Binn\;C:\Program Files (x86)\Windows Kits\10\Windows Performance Toolkit\;C:\Program Files\Microsoft SQL Server\130\Tools\Binn\;C:\Windows\system32\config\systemprofile\.dnx\bin;C:\Program Files\Microsoft DNX\Dnvm\;C:\Program Files\TortoiseHg\;C:\Users\Ilya\AppData\Roaming\Python\Scripts;C:\Users\Ilya\AppData\Local\atom\bin
Binary file added 50 Rmd_learning/graph.pdf
Binary file not shown.
Binary file added 50 Rmd_learning/my.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added 50 Rmd_learning/webshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions 51 habr/51 habr.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: CP1251

RnwWeave: Sweave
LaTeX: pdfLaTeX
101 changes: 101 additions & 0 deletions 51 habr/genrih.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
library(dplyr)
library(tidyr)
library(magrittr)
library(stringr)
library(tibble)
library(iterators)
library(foreach)
library(doParallel)

cat("Ãîòîâèì äàííûå")

# Ãåíåðèì ñëîâàðü ñòåììèíãà: êàêîé òåðì áðàòü è â êàêîé ïðåîáðàçîâàòü
terms.from <-
replicate(n = 800, expr = paste0(sample(
x = letters,
size = rpois(n = 1, lambda = 10),
replace = T
), collapse = "")) #êàêîé áðàòü

terms.to <- abbreviate(terms.from, method = "left.kept") # äåëàåì "êàê áû ñòåììèíã" ÷åðåç áàçîâóþ ôóíêöèþ abbreviate (äëÿ ïðèìåðà-ñîéäåò)

dict <-
data.frame(terms.from,
terms.to,
row.names = NULL,
stringsAsFactors = F)
head(dict)

# ãåíåðèì òåðìû âíå ñëîâàðÿ (íå ïîäëåæàò ñòåììèíãó)
terms.appendix <-
replicate(n = 100, expr = paste0(sample(
x = letters,
size = rnorm(n = 1, mean = 30, sd = 10),
replace = T
), collapse = ""))

# ãåíåðèì îáùèé êîðïóñ äîêóìåíòîâ êîòîðûé äîëæåí áûòü îáðàáîòàí ñëîâàðåì: îäèí äîêóìåíò = ïðåäëîæåíèå èç ñëó÷àéíûõ ñëîâ-òåðìîâ
all.terms <- replicate(n = 8000, expr =
paste(sort(
c(
sample(x = terms.from, size = 1, replace = F),
# ñëó÷àéíîå ñëîâî èç terms.from
sample(x = terms.to, size = 1, replace = F),
# + ñëó÷àéíîå ñëîâî èç terms.to
sample(x = terms.appendix, size = 1, replace = F),
# + ñëó÷àéíîå ñëîâî âíå ñëîâàðÿ
rf(n = 1, df1 = 10, df2 = 1) # + ñëó÷àéíàÿ öèôðà
)
, decreasing = T
)
, collapse = " "))

cat("Ñìîòðèì áàçîâûé ïîäõîä")
# ñìîòðèì äëèòåëüíîñòü áàçîâîãî ïîäõîäà çàìåíû ïî ñëîâàðþ (86.46 sec)
all.terms2 <- all.terms
system.time(for (i in 1:nrow(dict)) {
all.terms2 <-
gsub(
pattern = paste0("\\b", dict$terms.from[i], "\\b", collapse = ""),
replacement = dict$terms.to[i],
x = all.terms2,
ignore.case = T,
fixed = F
)
})
data.frame(all.terms[1:5], all.terms2[1:5])

# ñìîòðèì äëèòåëüíîñòü ðåøåíèÿ çàìåíû ïî ñëîâàðþ (525.52 sec + Null íà âûõRîäå)
cat("Ñìîòðèì ïàðàëëåëüíûé ïîäõîä")
cores = detectCores()
#registerDoParallel(detectCores() - 1)
registerDoParallel(cores)
getDoParWorkers()

t <- as_tibble(all.terms)
all.terms2 <- t %>%
mutate(l = nchar(value), thread = ntile(l, n = cores)) %>%
select(-l) %>%
group_by(thread) %>%
nest()

system.time(res <-
foreach(it = iter(all.terms2$data), .combine = 'c') %dopar% {
temp.val <- it$value;
# temp.val <- paste(it$value, collapse = '\n');
# åñëè òàê ñõëîïûâàåì, òî ïîòîì íàäî äîñòàòü ñ ïîìîùüþ
# stringr::str_extract_all(tempval, "[^\n]+\n")
cat("----\n"); str(it);
for (i in 1:nrow(dict)) {
temp.val <-
gsub(
pattern = paste0("\\b", dict$terms.from[i], "\\b", collapse = ""),
replacement = dict$terms.to[i],
x = temp.val,
ignore.case = T,
fixed = F
)
}
cat("====\n"); str(temp.val);
temp.val
})
14 changes: 0 additions & 14 deletions _phjs/file4ac030a64c90_files/DiagrammeR-styles-0.2/styles.css

This file was deleted.

71 changes: 0 additions & 71 deletions _phjs/file4ac030a64c90_files/grViz-binding-0.8.4/grViz.js

This file was deleted.

Loading

0 comments on commit 8e28197

Please sign in to comment.