I use the Open Source photo management Software Digikam (along with other tools such as Gimp and DarkTable).  I obviously need very little encouragement to combine my geeky hobbies, so I quickly tried to interrogate Digikam with R, which is easy, because Digikam keeps all it's image info in a SQLite database, which R has support for.

So this post shows how I did it, along with some of the output, such as the focal length of my images over time, looks like I need a telephoto lens !  (this script and my digikam db are in github here)

plot of chunk unnamed-chunk-2


## Loading required package: DBI

library(ggplot2) library(plyr) m <- dbDriver("SQLite") basedir <- "/home/paul/RStudio/DigikamR/" con <- dbConnect(m, dbname = paste(basedir, "data/digikam4.db", sep = ""))

Now we've opened the database, we can examine some of the tables within it.

# List the tables in the database
##  [1] "AlbumRoots"         "Albums"             "DownloadHistory"   
##  [4] "ImageComments"      "ImageCopyright"     "ImageHaarMatrix"   
##  [7] "ImageHistory"       "ImageInformation"   "ImageMetadata"     
## [10] "
ImagePositions"     "ImageProperties"    "ImageRelations"    
## [13] "ImageTagProperties" "ImageTags"          "Images"            
## [16] "Searches"           "Settings"           "TagProperties"     
## [19] "Tags"               "TagsTree"

# List the columns of some of the interesting tables
names(dbReadTable(con, "ImageInformation"))
##  [1] "imageid"          "rating"           "creationDate"    
##  [4] "digitizationDate" "orientation"      "width"           
##  [7] "height"           "format"           "colorDepth"      
## [10] "colorModel"
names(dbReadTable(con, "ImageComments"))
## [1] "id"       "imageid"  "type"     "language" "author"   "date"    
## [7] "comment"
names(dbReadTable(con, "ImageMetadata"))
##  [1] "imageid"                      "make"                        
##  [3] "model"                        "lens"                        
##  [5] "aperture"                     "focalLength"                 
##  [7] "focalLength35"                "exposureTime"                
##  [9] "exposureProgram"              "exposureMode"                
## [11] "sensitivity"                  "flash"                       
## [13] "whiteBalance"                 "whiteBalanceColorTemperature"
## [15] "meteringMode"                 "subjectDistance"             
## [17] "subjectDistanceCategory"
names(dbReadTable(con, "ImageProperties"))
## [1] "imageid"  "property" "value"
names(dbReadTable(con, "ImagePositions"))
##  [1] "imageid"         "latitude"        "latitudeNumber" 
##  [4] "longitude"       "longitudeNumber" "altitude"       
##  [7] "orientation"     "tilt"            "roll"           
## [10] "accuracy"        "description"
names(dbReadTable(con, "Images"))
## [1] "id"               "album"            "name"            
## [4] "status"           "category"         "modificationDate"
## [7] "fileSize"         "uniqueHash"
names(dbReadTable(con, "TagProperties"))
## [1] "tagid"    "property" "value"

And now we can pull some of the inetresting tables into a dataframe

# Pull some of the information together
Imgs <- dbReadTable(con, "Images")
ImgComments <- dbReadTable(con, "ImageComments")
ImgMeta <- dbReadTable(con, "ImageMetadata")
ImgInfo <- dbReadTable(con, "ImageInformation")
# and merge it together
ImgMerge <- merge(Imgs, ImgMeta, by.x = "id", by.y = "imageid")
ImgMerge <- merge(ImgMerge, ImgInfo, by.x = "id", by.y = "imageid")
# clean it up
ImgMerge$make <- as.factor(ImgMerge$make)
ImgMerge$model <- as.factor(ImgMerge$model)
ImgMerge$faperture <- as.factor(ImgMerge$aperture)
ImgMerge$fexposureTime <- as.factor(ImgMerge$exposureTime)
ImgMerge$fmodel <- as.factor(ImgMerge$model)
ImgMerge$Year <- format(as.POSIXct(ImgMerge$creationDate), format = "%Y")
ImgMerge$Month <- format(as.POSIXct(ImgMerge$creationDate), format = "%b")

Here are some plots

# and draw some graphs
ggplot(data = subset(ImgMerge, focalLength < 60), aes(x = as.POSIXct(creationDate), 
    y = focalLength, colour = model)) + geom_point()

plot of chunk unnamed-chunk-2

ggplot(data = ImgMerge, aes(x = focalLength)) + geom_histogram(binwidth = 5, 
    aes(colour = as.factor(model))) + facet_grid(model ~ .)

plot of chunk unnamed-chunk-2

qplot(data = ImgMerge, x = as.numeric(as.character(aperture)), y = log(as.numeric(as.character(exposureTime))), 
    colour = as.factor(model), geom = "point")
## Warning: Removed 2638 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-2

ggplot(data = subset(ImgMerge, model == "NIKON D5000"), aes(x = focalLength)) + 
    geom_histogram(binwidth = 5) + facet_grid(Year ~ .)

plot of chunk unnamed-chunk-2

ggplot(data = subset(ImgMerge, model == "NIKON D5000"), aes(x = as.POSIXct(creationDate), 
    y = focalLength)) + geom_point()
## Warning: Removed 14 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-2

ggplot(data = subset(ImgMerge, model == "NIKON D5000" & focalLength < 60), aes(x = as.POSIXct(creationDate), 
    y = focalLength)) + geom_point(alpha = 0.2)

plot of chunk unnamed-chunk-2