10.3 Extraindo altura e dap
Funções para extrair altura e diâmetro à altura do peito (DAP) de colunas descritivas.
10.3.1 Altura
pegaaltura <- function(x) {
x <- gsub("\\+/-", "", x)
x <- gsub(" ", " ", x)
x <- gsub(" ", " ", x)
# print(x)
pt1 <- "[-+]?[0-9]*[,.]?[0-9] m x"
pt2 <- "[-+]?[0-9]*[,.]?[0-9]m x"
pt3 <- "[-+]?[0-9]*[,.]?[0-9] m\\. x"
pt4 <- "[-+]?[0-9]*[,.]?[0-9] m de altura"
pt5 <- "[-+]?[0-9]*[,.]?[0-9] m\\. de altura"
pt6 <- "[-+]?[0-9]*[,.]?[0-9]m d ealtura"
pt7 <- "[-+]?[0-9]*[,.]?[0-9]m de alto"
pt8 <- "[-+]?[0-9]*[,.]?[0-9] m de alto"
pt9 <- "[-+]?[0-9]*[,.]?[0-9]m alto"
pt10 <- "[-+]?[0-9]*[,.]?[0-9]m de altura"
pt11 <- "[-+]?[0-9]*[,.]?[0-9] m tall"
pt12 <- "[-+]?[0-9]*[,.]?[0-9]m tall"
pt13 <- "[-+]?[0-9]*[,.]?[0-9] m\\. tall"
pt14 <- "[-+]?[0-9]*[,.]?[0-9] m\\. Tall"
pt15 <- "[-+]?[0-9]*[,.]?[0-9]m\\. tall"
pt16 <- "Tree, [-+]?[0-9]*[,.]?[0-9]m"
pt17 <- "Tree, [-+]?[0-9]*[,.]?[0-9] m"
pt18 <- "Tree [-+]?[0-9]*[,.]?[0-9] m"
pt19 <- "Tree [-+]?[0-9]*[,.]?[0-9]m"
pt20 <- "to [-+]?[0-9]*v?[0-9] m"
pt21 <- "Tree [0-9]*-[0-9]*m"
pt22 <- "de [-+]?[0-9]*[,.]?[0-9] m"
pt23 <- "de [-+]?[0-9]*v?[0-9]m"
pt24 <- "de [0-9]*-[0-9]* m"
pt25 <- "Tree [0-9]*-[0-9]* m"
pt26 <- "de [0-9]*-[0-9]*m"
pt27 <- "alt. [-+]?[0-9]*[,.]?[0-9] m"
pt28 <- "[-+]?[0-9]*[,.]?[0-9] feet high"
pt29 <- "[-+]?[0-9]*[,.]?[0-9] ft. high"
pt30 <- "rvore [-+]?[0-9]*[,.]?[0-9]m"
pt31 <- "rvore [-+]?[0-9]*[,.]?[0-9] m"
pt32 <- "rvore, [-+]?[0-9]*[,.]?[0-9] m"
pt33 <- "rvore, [-+]?[0-9]*[,.]?[0-9]m"
pt34 <- "rbusto [-+]?[0-9]*[,.]?[0-9]m"
pt35 <- "rbusto [-+]?[0-9]*[,.]?[0-9] m"
pt36 <- "rbusto, [-+]?[0-9]*[,.]?[0-9] m"
pt37 <- "rbusto, [-+]?[0-9]*[,.].?[0-9]m"
pt38 <- "rvore +/- [-+]?[0-9]*[,.].?[0-9]m"
pt39 <- "rvore ca. [-+]?[0-9]*[,.].?[0-9]m"
pt40 <- "de [-+]?[0-9]*[,.].?[0-9] de altura"
pt41 <- "altura [-+]?[0-9]*[,.].?[0-9]m"
pt42 <- "altura [-+]?[0-9]*[,.].?[0-9] m"
pt43 <- "of [-+]?[0-9]*[,.].?[0-9]m"
pt44 <- "to [-+]?[0-9]*[,.].?[0-9]m"
pt45 <- "Treelet [-+]?[0-9]*[,.].?[0-9]m"
pt46 <- "rvore [-+]?[0-9]*[,.].?[0-9] m"
pt47 <- "rvore, [-+]?[0-9]*[,.].?[0-9] m"
pt48 <- "rvoreta, [-+]?[0-9]*[,.].?[0-9] m"
pt49 <- "Fuste= [-+]?[0-9]*[,.].?[0-9] m"
pt50 <- "Fuste= [-+]?[0-9]*[,.].?[0-9]m"
pt51 <- "com [-+]?[0-9]*[,.].?[0-9] m. alt."
pt52 <- "Height: [-+]?[0-9]*[,.].?[0-9] m"
pt53 <- "Arbol [-+]?[0-9]*[,.].?[0-9]m"
pt54 <- "Treelet, [-+]?[0-9]*[,.].?[0-9]m"
pt55 <- "altura = [-+]?[0-9]*[,.].?[0-9]m"
pt56 <- "Fuste = [-+]?[0-9]*[,.].?[0-9]m"
pt57 <- "Fuste = [-+]?[0-9]*[,.].?[0-9] m"
altura <- NA
for (p in 1:57) {
pt <- get(paste("pt", p, sep = ""))
gp <- grep(pt, x, ignore.case = F)
if (length(gp) > 0 & is.na(altura)) {
# print(p)
rmm <- strsplit(x, pt)[[1]]
rmm <- rmm[rmm != "" & rmm != "." & !is.na(rmm)]
xx <- x
if (length(rmm) > 0) {
for (r in length(rmm):1) {
xx <- gsub(rmm[r], "", xx, fixed = T, useBytes = T)
}
}
xx <- trimws(gsub("[A-Z]|\\(|\\)|:|=", "", xx, ignore.case = T), which = "both")
xx <- gsub(",", ".", xx)
tt <- grep("-", xx)
if (length(tt) > 0) {
xxx <- strsplit(xx, "-")[[1]]
xxx <- xxx[xxx != ""]
xxx <- gsub(" \\.", "", xxx)
xx <- mean(as.numeric(trimws(xxx, which = "both")), na.rm = T)
} else {
xx <- strsplit(xx, " ")[[1]]
xx <- trimws(xx, which = "both")
xx <- xx[xx != "."]
xx <- xx[1]
}
xx <- as.numeric(xx)
if (!is.na(xx) && xx > 0) {
altura <- xx
} else {
altura <- NA
}
}
}
return(altura)
}
10.3.2 DAP
pegadap <- function(x) {
x <- gsub("\\+/-", "", x)
x <- gsub(" ", " ", x)
x <- gsub(" ", " ", x)
# print(x)
pt1 <- "x [-+]?[0-9]*[,.]?[0-9] cm de circ"
pt2 <- "x [-+]?[0-9]*[,.]?[0-9] cm de di"
pt3 <- "x [-+]?[0-9]*[,.]?[0-9]cm de di"
pt4 <- "x [-+]?[0-9]*[,.]?[0-9]cm di"
pt5 <- "m x [-+]?[0-9]*[,.]?[0-9] cm DAP"
pt6 <- "m x [-+]?[0-9]*[,.]?[0-9] cm"
pt7 <- "de [-+]?[0-9]*[,.]?[0-9] m de DAP"
pt8 <- "[-+]?[0-9]*[,.]?[0-9] cm de DAP"
pt9 <- "[-+]?[0-9]*[,.]?[0-9] cm (DAP)"
pt10 <- "[-+]?[0-9]*[,.]?[0-9] cm dbh"
pt11 <- "DAP [-+]?[0-9]*[,.]?[0-9] cm"
pt12 <- "[-+]?[0-9]*[,.]?[0-9] cm D.A.P."
pt13 <- "D.A.P. = [-+]?[0-9]*[,.]?[0-9] cm"
pt14 <- "[-+]?[0-9]*[,.]?[0-9]cm de di"
pt15 <- "[-+]?[0-9]*[,.]?[0-9]cm dap"
pt16 <- "[-+]?[0-9]*[,.]?[0-9]cm. dia"
pt17 <- "dbh. [-+]?[0-9]*[,.]?[0-9]cm"
pt18 <- "[-+]?[0-9]*[,.]?[0-9]cm. in dia"
pt19 <- "[-+]?[0-9]*[,.]?[0-9] cm de di"
pt20 <- "[-+]?[0-9]*[,.]?[0-9] cm (DAP)"
pt21 <- "DBH [-+]?[0-9]*[,.]?[0-9] cm"
# x="Árvore de 13m de altura x 11cm de diâmetro do fuste."
altura <- NA
for (p in 1:21) {
pt <- get(paste("pt", p, sep = ""))
gp <- grep(pt, x, ignore.case = F)
if (length(gp) == 1 & is.na(altura)) {
# print(p)
rmm <- strsplit(x, pt)[[1]]
rmm <- rmm[rmm != "" & rmm != "." & !is.na(rmm)]
xx <- x
if (length(rmm) == 1) {
for (r in length(rmm):1) {
xx <- gsub(rmm[r], "", xx, fixed = T, useBytes = T)
}
} else {
if (length(rmm) == 2) {
n1 <- nchar(rmm[1])
n2 <- nchar(rmm[2])
n0 <- nchar(x)
ns <- n1 + 1
nt <- n0 - n2
xx <- substr(x, ns, nt)
} else {
if (length(rmm) > 2) {
xx <- NA
}
}
}
if (!is.na(xx)) {
xx <- trimws(gsub("[A-Z]|\\(|\\)|:|=", "", xx, ignore.case = T), which = "both")
xx <- gsub(",", ".", xx)
tt <- grep("-", xx)
if (length(tt) > 0) {
xxx <- strsplit(xx, "-")[[1]]
xxx <- xxx[xxx != ""]
xxx <- gsub(" \\.", "", xxx)
xx <- mean(as.numeric(trimws(xxx, which = "both")), na.rm = T)
} else {
xx <- strsplit(xx, " ")[[1]]
xx <- trimws(xx, which = "both")
xx <- xx[xx != "." & xx != "" & xx != "..."]
xx <- xx[1]
if (substr(xx, nchar(xx), nchar(xx)) == ".") {
xx <- substr(xx, 1, nchar(xx) - 1)
}
}
xx <- as.numeric(xx)
if (xx > 0) {
altura <- xx
}
}
}
}
return(altura)
}