-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
сделал мультипроцессинг по задаче Генриха
- Loading branch information
Showing
12 changed files
with
116 additions
and
975 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,3 +32,4 @@ vignettes/*.pdf | |
|
||
*.jpg | ||
*.rar | ||
_phjs/1.js |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
PATH=C:\Program Files (x86)\PuTTY\;c:\Rtools\bin;c:\Rtools\gcc-4.6.3\bin;C:\Python27\;C:\Python27\Scripts;C:\Program Files (x86)\Common Files\Intel\Shared Files\cpp\bin\Intel64;C:\ProgramData\Oracle\Java\javapath;C:\Program Files (x86)\PC Connectivity Solution\;C:\Program Files\Broadcom\Broadcom 802.11 Network Adapter\Driver;C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common;C:\Program Files (x86)\Intel\iCLS Client\;C:\Program Files\Intel\iCLS Client\;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Program Files\Intel\Intel(R) Management Engine Components\DAL;C:\Program Files\Intel\Intel(R) Management Engine Components\IPT;C:\Program Files (x86)\Intel\Intel(R) Management Engine Components\DAL;C:\Program Files (x86)\Intel\Intel(R) Management Engine Components\IPT;C:\Program Files (x86)\Intel\OpenCL SDK\2.0\bin\x86;C:\Program Files (x86)\Intel\OpenCL SDK\2.0\bin\x64;C:\Program Files\Lenovo\Bluetooth Software\;C:\Program Files\Lenovo\Bluetooth Software\syswow64;C:\Program Files\Lenovo Fingerprint Reader\;C:\Program Files\Lenovo Fingerprint Reader\x86\;C:\ProgramData\Lenovo\ReadyApps;C:\Program Files (x86)\Skype\Phone\;C:\Program Files\EmEditor;C:\Program Files\Microsoft SQL Server\120\Tools\Binn\;C:\Program Files (x86)\Windows Kits\10\Windows Performance Toolkit\;C:\Program Files\Microsoft SQL Server\130\Tools\Binn\;C:\Windows\system32\config\systemprofile\.dnx\bin;C:\Program Files\Microsoft DNX\Dnvm\;C:\Program Files\TortoiseHg\;C:\Users\Ilya\AppData\Roaming\Python\Scripts;C:\Users\Ilya\AppData\Local\atom\bin |
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: Default | ||
SaveWorkspace: Default | ||
AlwaysSaveHistory: Default | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: CP1251 | ||
|
||
RnwWeave: Sweave | ||
LaTeX: pdfLaTeX |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
library(dplyr) | ||
library(tidyr) | ||
library(magrittr) | ||
library(stringr) | ||
library(tibble) | ||
library(iterators) | ||
library(foreach) | ||
library(doParallel) | ||
|
||
cat("Ãîòîâèì äàííûå") | ||
|
||
# Ãåíåðèì ñëîâàðü ñòåììèíãà: êàêîé òåðì áðàòü è â êàêîé ïðåîáðàçîâàòü | ||
terms.from <- | ||
replicate(n = 800, expr = paste0(sample( | ||
x = letters, | ||
size = rpois(n = 1, lambda = 10), | ||
replace = T | ||
), collapse = "")) #êàêîé áðàòü | ||
|
||
terms.to <- abbreviate(terms.from, method = "left.kept") # äåëàåì "êàê áû ñòåììèíã" ÷åðåç áàçîâóþ ôóíêöèþ abbreviate (äëÿ ïðèìåðà-ñîéäåò) | ||
|
||
dict <- | ||
data.frame(terms.from, | ||
terms.to, | ||
row.names = NULL, | ||
stringsAsFactors = F) | ||
head(dict) | ||
|
||
# ãåíåðèì òåðìû âíå ñëîâàðÿ (íå ïîäëåæàò ñòåììèíãó) | ||
terms.appendix <- | ||
replicate(n = 100, expr = paste0(sample( | ||
x = letters, | ||
size = rnorm(n = 1, mean = 30, sd = 10), | ||
replace = T | ||
), collapse = "")) | ||
|
||
# ãåíåðèì îáùèé êîðïóñ äîêóìåíòîâ êîòîðûé äîëæåí áûòü îáðàáîòàí ñëîâàðåì: îäèí äîêóìåíò = ïðåäëîæåíèå èç ñëó÷àéíûõ ñëîâ-òåðìîâ | ||
all.terms <- replicate(n = 8000, expr = | ||
paste(sort( | ||
c( | ||
sample(x = terms.from, size = 1, replace = F), | ||
# ñëó÷àéíîå ñëîâî èç terms.from | ||
sample(x = terms.to, size = 1, replace = F), | ||
# + ñëó÷àéíîå ñëîâî èç terms.to | ||
sample(x = terms.appendix, size = 1, replace = F), | ||
# + ñëó÷àéíîå ñëîâî âíå ñëîâàðÿ | ||
rf(n = 1, df1 = 10, df2 = 1) # + ñëó÷àéíàÿ öèôðà | ||
) | ||
, decreasing = T | ||
) | ||
, collapse = " ")) | ||
|
||
cat("Ñìîòðèì áàçîâûé ïîäõîä") | ||
# ñìîòðèì äëèòåëüíîñòü áàçîâîãî ïîäõîäà çàìåíû ïî ñëîâàðþ (86.46 sec) | ||
all.terms2 <- all.terms | ||
system.time(for (i in 1:nrow(dict)) { | ||
all.terms2 <- | ||
gsub( | ||
pattern = paste0("\\b", dict$terms.from[i], "\\b", collapse = ""), | ||
replacement = dict$terms.to[i], | ||
x = all.terms2, | ||
ignore.case = T, | ||
fixed = F | ||
) | ||
}) | ||
data.frame(all.terms[1:5], all.terms2[1:5]) | ||
|
||
# ñìîòðèì äëèòåëüíîñòü ðåøåíèÿ çàìåíû ïî ñëîâàðþ (525.52 sec + Null íà âûõRîäå) | ||
cat("Ñìîòðèì ïàðàëëåëüíûé ïîäõîä") | ||
cores = detectCores() | ||
#registerDoParallel(detectCores() - 1) | ||
registerDoParallel(cores) | ||
getDoParWorkers() | ||
|
||
t <- as_tibble(all.terms) | ||
all.terms2 <- t %>% | ||
mutate(l = nchar(value), thread = ntile(l, n = cores)) %>% | ||
select(-l) %>% | ||
group_by(thread) %>% | ||
nest() | ||
|
||
system.time(res <- | ||
foreach(it = iter(all.terms2$data), .combine = 'c') %dopar% { | ||
temp.val <- it$value; | ||
# temp.val <- paste(it$value, collapse = '\n'); | ||
# åñëè òàê ñõëîïûâàåì, òî ïîòîì íàäî äîñòàòü ñ ïîìîùüþ | ||
# stringr::str_extract_all(tempval, "[^\n]+\n") | ||
cat("----\n"); str(it); | ||
for (i in 1:nrow(dict)) { | ||
temp.val <- | ||
gsub( | ||
pattern = paste0("\\b", dict$terms.from[i], "\\b", collapse = ""), | ||
replacement = dict$terms.to[i], | ||
x = temp.val, | ||
ignore.case = T, | ||
fixed = F | ||
) | ||
} | ||
cat("====\n"); str(temp.val); | ||
temp.val | ||
}) |
14 changes: 0 additions & 14 deletions
14
_phjs/file4ac030a64c90_files/DiagrammeR-styles-0.2/styles.css
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.