10.3 Extraindo altura e dap

Funções para extrair altura e diâmetro à altura do peito (DAP) de colunas descritivas.

10.3.1 Altura

pegaaltura <- function(x) {
  x <- gsub("\\+/-", "", x)
  x <- gsub("  ", " ", x)
  x <- gsub("  ", " ", x)
  # print(x)
  pt1 <- "[-+]?[0-9]*[,.]?[0-9] m x"
  pt2 <- "[-+]?[0-9]*[,.]?[0-9]m x"
  pt3 <- "[-+]?[0-9]*[,.]?[0-9] m\\. x"
  pt4 <- "[-+]?[0-9]*[,.]?[0-9] m de altura"
  pt5 <- "[-+]?[0-9]*[,.]?[0-9] m\\. de altura"
  pt6 <- "[-+]?[0-9]*[,.]?[0-9]m d ealtura"
  pt7 <- "[-+]?[0-9]*[,.]?[0-9]m de alto"
  pt8 <- "[-+]?[0-9]*[,.]?[0-9] m de alto"
  pt9 <- "[-+]?[0-9]*[,.]?[0-9]m alto"
  pt10 <- "[-+]?[0-9]*[,.]?[0-9]m de altura"
  pt11 <- "[-+]?[0-9]*[,.]?[0-9] m tall"
  pt12 <- "[-+]?[0-9]*[,.]?[0-9]m tall"
  pt13 <- "[-+]?[0-9]*[,.]?[0-9] m\\. tall"
  pt14 <- "[-+]?[0-9]*[,.]?[0-9] m\\. Tall"
  pt15 <- "[-+]?[0-9]*[,.]?[0-9]m\\. tall"
  pt16 <- "Tree, [-+]?[0-9]*[,.]?[0-9]m"
  pt17 <- "Tree, [-+]?[0-9]*[,.]?[0-9] m"
  pt18 <- "Tree [-+]?[0-9]*[,.]?[0-9] m"
  pt19 <- "Tree [-+]?[0-9]*[,.]?[0-9]m"
  pt20 <- "to [-+]?[0-9]*v?[0-9] m"
  pt21 <- "Tree [0-9]*-[0-9]*m"
  pt22 <- "de [-+]?[0-9]*[,.]?[0-9] m"
  pt23 <- "de [-+]?[0-9]*v?[0-9]m"
  pt24 <- "de [0-9]*-[0-9]* m"
  pt25 <- "Tree [0-9]*-[0-9]* m"
  pt26 <- "de [0-9]*-[0-9]*m"
  pt27 <- "alt. [-+]?[0-9]*[,.]?[0-9] m"
  pt28 <- "[-+]?[0-9]*[,.]?[0-9] feet high"
  pt29 <- "[-+]?[0-9]*[,.]?[0-9] ft. high"

  pt30 <- "rvore [-+]?[0-9]*[,.]?[0-9]m"
  pt31 <- "rvore [-+]?[0-9]*[,.]?[0-9] m"
  pt32 <- "rvore, [-+]?[0-9]*[,.]?[0-9] m"
  pt33 <- "rvore, [-+]?[0-9]*[,.]?[0-9]m"

  pt34 <- "rbusto [-+]?[0-9]*[,.]?[0-9]m"
  pt35 <- "rbusto [-+]?[0-9]*[,.]?[0-9] m"
  pt36 <- "rbusto, [-+]?[0-9]*[,.]?[0-9] m"
  pt37 <- "rbusto, [-+]?[0-9]*[,.].?[0-9]m"
  pt38 <- "rvore +/- [-+]?[0-9]*[,.].?[0-9]m"
  pt39 <- "rvore ca. [-+]?[0-9]*[,.].?[0-9]m"
  pt40 <- "de [-+]?[0-9]*[,.].?[0-9] de altura"
  pt41 <- "altura [-+]?[0-9]*[,.].?[0-9]m"
  pt42 <- "altura [-+]?[0-9]*[,.].?[0-9] m"
  pt43 <- "of [-+]?[0-9]*[,.].?[0-9]m"
  pt44 <- "to [-+]?[0-9]*[,.].?[0-9]m"
  pt45 <- "Treelet [-+]?[0-9]*[,.].?[0-9]m"
  pt46 <- "rvore [-+]?[0-9]*[,.].?[0-9] m"
  pt47 <- "rvore, [-+]?[0-9]*[,.].?[0-9] m"
  pt48 <- "rvoreta, [-+]?[0-9]*[,.].?[0-9] m"
  pt49 <- "Fuste= [-+]?[0-9]*[,.].?[0-9] m"
  pt50 <- "Fuste= [-+]?[0-9]*[,.].?[0-9]m"
  pt51 <- "com [-+]?[0-9]*[,.].?[0-9] m. alt."
  pt52 <- "Height: [-+]?[0-9]*[,.].?[0-9] m"
  pt53 <- "Arbol [-+]?[0-9]*[,.].?[0-9]m"
  pt54 <- "Treelet, [-+]?[0-9]*[,.].?[0-9]m"
  pt55 <- "altura = [-+]?[0-9]*[,.].?[0-9]m"
  pt56 <- "Fuste = [-+]?[0-9]*[,.].?[0-9]m"
  pt57 <- "Fuste = [-+]?[0-9]*[,.].?[0-9] m"

  altura <- NA
  for (p in 1:57) {
    pt <- get(paste("pt", p, sep = ""))
    gp <- grep(pt, x, ignore.case = F)
    if (length(gp) > 0 & is.na(altura)) {
      # print(p)
      rmm <- strsplit(x, pt)[[1]]
      rmm <- rmm[rmm != "" & rmm != "." & !is.na(rmm)]
      xx <- x
      if (length(rmm) > 0) {
        for (r in length(rmm):1) {
          xx <- gsub(rmm[r], "", xx, fixed = T, useBytes = T)
        }
      }
      xx <- trimws(gsub("[A-Z]|\\(|\\)|:|=", "", xx, ignore.case = T), which = "both")
      xx <- gsub(",", ".", xx)
      tt <- grep("-", xx)
      if (length(tt) > 0) {
        xxx <- strsplit(xx, "-")[[1]]
        xxx <- xxx[xxx != ""]
        xxx <- gsub(" \\.", "", xxx)
        xx <- mean(as.numeric(trimws(xxx, which = "both")), na.rm = T)
      } else {
        xx <- strsplit(xx, " ")[[1]]
        xx <- trimws(xx, which = "both")
        xx <- xx[xx != "."]
        xx <- xx[1]
      }
      xx <- as.numeric(xx)
      if (!is.na(xx) && xx > 0) {
        altura <- xx
      } else {
        altura <- NA
      }
    }
  }
  return(altura)
}

10.3.2 DAP

pegadap <- function(x) {
  x <- gsub("\\+/-", "", x)
  x <- gsub("  ", " ", x)
  x <- gsub("  ", " ", x)
  # print(x)
  pt1 <- "x [-+]?[0-9]*[,.]?[0-9] cm de circ"
  pt2 <- "x [-+]?[0-9]*[,.]?[0-9] cm de di"
  pt3 <- "x [-+]?[0-9]*[,.]?[0-9]cm de di"
  pt4 <- "x [-+]?[0-9]*[,.]?[0-9]cm di"
  pt5 <- "m x [-+]?[0-9]*[,.]?[0-9] cm DAP"
  pt6 <- "m x [-+]?[0-9]*[,.]?[0-9] cm"
  pt7 <- "de [-+]?[0-9]*[,.]?[0-9] m de DAP"
  pt8 <- "[-+]?[0-9]*[,.]?[0-9] cm de DAP"
  pt9 <- "[-+]?[0-9]*[,.]?[0-9] cm (DAP)"
  pt10 <- "[-+]?[0-9]*[,.]?[0-9] cm dbh"
  pt11 <- "DAP [-+]?[0-9]*[,.]?[0-9] cm"
  pt12 <- "[-+]?[0-9]*[,.]?[0-9] cm D.A.P."
  pt13 <- "D.A.P. = [-+]?[0-9]*[,.]?[0-9] cm"
  pt14 <- "[-+]?[0-9]*[,.]?[0-9]cm de di"
  pt15 <- "[-+]?[0-9]*[,.]?[0-9]cm dap"
  pt16 <- "[-+]?[0-9]*[,.]?[0-9]cm. dia"
  pt17 <- "dbh. [-+]?[0-9]*[,.]?[0-9]cm"
  pt18 <- "[-+]?[0-9]*[,.]?[0-9]cm. in dia"
  pt19 <- "[-+]?[0-9]*[,.]?[0-9] cm de di"
  pt20 <- "[-+]?[0-9]*[,.]?[0-9] cm (DAP)"
  pt21 <- "DBH [-+]?[0-9]*[,.]?[0-9] cm"

  # x="Árvore de 13m de altura x 11cm de diâmetro do fuste."
  altura <- NA
  for (p in 1:21) {
    pt <- get(paste("pt", p, sep = ""))
    gp <- grep(pt, x, ignore.case = F)
    if (length(gp) == 1 & is.na(altura)) {
      # print(p)
      rmm <- strsplit(x, pt)[[1]]
      rmm <- rmm[rmm != "" & rmm != "." & !is.na(rmm)]
      xx <- x
      if (length(rmm) == 1) {
        for (r in length(rmm):1) {
          xx <- gsub(rmm[r], "", xx, fixed = T, useBytes = T)
        }
      } else {
        if (length(rmm) == 2) {
          n1 <- nchar(rmm[1])
          n2 <- nchar(rmm[2])
          n0 <- nchar(x)
          ns <- n1 + 1
          nt <- n0 - n2
          xx <- substr(x, ns, nt)
        } else {
          if (length(rmm) > 2) {
            xx <- NA
          }
        }
      }
      if (!is.na(xx)) {
        xx <- trimws(gsub("[A-Z]|\\(|\\)|:|=", "", xx, ignore.case = T), which = "both")
        xx <- gsub(",", ".", xx)
        tt <- grep("-", xx)
        if (length(tt) > 0) {
          xxx <- strsplit(xx, "-")[[1]]
          xxx <- xxx[xxx != ""]
          xxx <- gsub(" \\.", "", xxx)
          xx <- mean(as.numeric(trimws(xxx, which = "both")), na.rm = T)
        } else {
          xx <- strsplit(xx, " ")[[1]]
          xx <- trimws(xx, which = "both")
          xx <- xx[xx != "." & xx != "" & xx != "..."]
          xx <- xx[1]
          if (substr(xx, nchar(xx), nchar(xx)) == ".") {
            xx <- substr(xx, 1, nchar(xx) - 1)
          }
        }
        xx <- as.numeric(xx)
        if (xx > 0) {
          altura <- xx
        }
      }
    }
  }
  return(altura)
}