Common¶
These objects are providing access to our Schema in Python’s Cap’n Proto implementation.
-
FrameContainer¶ FrameContainer are basically the root of our data container. They represent a part of a video that usually consist of 20 minutes. Each FrameContainer only has the frames of one camera.
# Corresponds to a video struct FrameContainer { id @0 :UInt64; # global unique id of the frame container dataSources @1:List(DataSource); # list of data sources (videos / images) fromTimestamp @2 :Float64; # unix timestamp of the first frame toTimestamp @3 :Float64; # unix timestamp of the last frame frames @4 :List(Frame); # frames must be sorted by in the order they where recorded. camId @5 :UInt16; # the cam number hiveId @6 :UInt16; # the id of the hive transformationMatrix @7 :List(Float32); # the transformation matrix from image coordinates to hive coordinates. # The matrix is of dimension 4x4 and stored this way # 1 | 2 | 3 | 4 # 5 | 6 # ... # 15| 16 }
-
Frame¶ A Frame holds all the information about a single image in a video.
# Corresponds to an image in the video. struct Frame { id @0 :UInt64; # global unique id of the frame dataSourceIdx @1:UInt32; # the frame is from this data source frameIdx @6 :UInt32; # sequential increasing index for every data source. timestamp @2 :Float64; # unix time stamp of the frame timedelta @3 :UInt32; # time difference between this frame and the frame before in microseconds detectionsUnion : union { detectionsCVP @4 :List(DetectionCVP); # detections format of the old computer vision pipeline detectionsDP @5 :List(DetectionDP); # detections format of the new deeppipeline detectionsTruth @7 :List(DetectionTruth); # detections format of ground truth data } }
-
DataSource¶ This is a part of a
FrameContainerand references the original video file.struct DataSource { idx @0 :UInt32; # the index of the data source filename @1 :Text; # filename of the data source videoPreviewFilename @2 :Text; # (optional) filename of the preview video }
-
DetectionCVP¶ This is the format of a detection in the old Computer Vision Pipeline format. It got replaced with
DetectionDPin the Summer 2015.struct DetectionCVP { idx @0 :UInt16; # sequential index of the detection, counted from 0 for every frame # the combination (idx, Frame.id) is a global key candidateIdx @1 :UInt16; # sequential index of the candidate per tag gridIdx @2 :UInt16; # sequential index of the grid/decoding per candidate xpos @3 :UInt16; # x coordinate of the grid center ypos @4 :UInt16; # y coordinate of the grid center xposHive @5 :UInt16; # x coordinate of the grid center wrt. the hive yposHive @6 :UInt16; # y coordinate of the grid center wrt. the hive zRotation @7 :Float32; # rotation of the grid in z plane yRotation @8 :Float32; # rotation of the grid in y plane xRotation @9 :Float32; # rotation of the grid in x plane lScore @10 :Float32; # roi score (Localizer) eScore @11 :UInt16; # ellipse score (EllipseFitter) gScore @12 :Float32; # grid score (GridFitter) decodedId @13 :UInt32; # decoded id }
-
DetectionDP¶ This is the new format for a detection that replaced
DetectionCVP.struct DetectionDP { idx @0 :UInt16; # sequential index of the detection, counted from 0 for every frame # the combination (idx, Frame.id) is a global key xpos @1 :UInt16; # x coordinate of the grid center wrt. the image ypos @2 :UInt16; # y coordinate of the grid center wrt. the image xposHive @3 :UInt16; # x coordinate of the grid center wrt. the hive yposHive @4 :UInt16; # y coordinate of the grid center wrt. the hive zRotation @5 :Float32; # rotation of the grid in z plane yRotation @6 :Float32; # rotation of the grid in y plane xRotation @7 :Float32; # rotation of the grid in x plane radius @8 :Float32; # radius of the tag localizerSaliency @9 :Float32; # saliency of the localizer network decodedId @10 :List(UInt8); # the decoded id, the bit probabilities are discretised to 0-255. # p(first bit == 1) = decodedId[0] / 255 descriptor @11 :List(UInt8); # visual descriptor of the detection. ordered from most # significant eight bits to least significant eight bits. }
-
DetectionTruth¶ This is the format for manually generated truth data that might be generated via the Editor GUI
struct DetectionTruth { idx @0 :UInt16; # sequential index of the detection, counted from 0 for every frame # the combination (idx, Frame.id) is a global key xpos @1 :UInt16; # x coordinate of the grid center wrt. the image ypos @2 :UInt16; # y coordinate of the grid center wrt. the image xposHive @3 :UInt16; # x coordinate of the grid center wrt. the hive yposHive @4 :UInt16; # y coordinate of the grid center wrt. the hive decodedId @5 :Int32; # decoded id by human readability @6 :Grade; # tags might be visible or (partially) obscured enum Grade { # ranks for evaluation of a tag's readability are: unknown @0; # - not considered or evaluted completely @1; # - completely visible **and** human readable partially @2; # - only partially visible and therefore **not** human readable none @3; # - **not** visible at all } }