-
Notifications
You must be signed in to change notification settings - Fork 5
Open
Description
Two options for anonymizing: Deleting data and overwriting. Consult with wiki and python version.
Toybox demo of how to modify an XML document in R...
library("XML")
library("magrittr")
# Pretend this is an ITS file
tree <- '
<xml_sucks>
<xml_is_stupid amen="TRUE"/>
<xml_is_lame lameness="1000">yes is it </xml_is_lame>
<xml_is_lame lameness="1001">what am i even doing </xml_is_lame>
</xml_sucks>
'
# Parse it
doc <- xmlParse(tree)
# Nuclear option: Delete all attributes
remove_attributes_in_xpath <- function(doc, path) {
doc %>%
getNodeSet(path) %>%
lapply(removeAttributes)
}
# Use dots to specify the attribute to be updated and the new value
set_attribute_in_xpath <- function(doc, path, ...) {
# Create named character vector from dots
edits <- unlist(list(...))
# One-off function to apply to each node
set_this_attribute <- function(node) {
xmlAttrs(node) <- edits
node
}
# Find matching nodes and update
doc %>%
getNodeSet(path) %>%
lapply(set_this_attribute)
}
# Drop all attributes from a node
remove_attributes_in_xpath(doc, "//xml_is_stupid")
#> [[1]]
#> <xml_is_stupid/>
# Overwrite an attribute
set_attribute_in_xpath(doc, "//xml_is_lame", lameness = 2000, verbosity = 10)
#> [[1]]
#> <xml_is_lame lameness="2000" verbosity="10">yes is it </xml_is_lame>
#>
#> [[2]]
#> <xml_is_lame lameness="2000" verbosity="10">what am i even doing </xml_is_lame>
# look at updated tree
doc
#> <?xml version="1.0"?>
#> <xml_sucks>
#> <xml_is_stupid/>
#> <xml_is_lame lameness="2000" verbosity="10">yes is it </xml_is_lame>
#> <xml_is_lame lameness="2000" verbosity="10">what am i even doing </xml_is_lame>
#> </xml_sucks>
#> Metadata
Metadata
Assignees
Labels
No labels