# LiveJournal DROP TABLE IF EXISTS domain; CREATE TABLE domain ( dmid SMALLINT UNSIGNED NOT NULL, PRIMARY KEY (dmid), namespace VARCHAR(255), UNIQUE (namespace) # LiveJournal:, FotoBilder: ); # classes are tied to domains. domains can have classes of items # with different mindevcounts. # # a minimum devcount is the number of copies the system tries to # maintain for files in that class # # unspecified classname means classid=0 (implicit class), and that # implies mindevcount=2 # DROP TABLE IF EXISTS class; CREATE TABLE class ( dmid SMALLINT UNSIGNED NOT NULL, classid TINYINT UNSIGNED NOT NULL, PRIMARY KEY (dmid,classid), classname VARCHAR(50), UNIQUE (dmid,classname), mindevcount TINYINT UNSIGNED NOT NULL ); # the length field is only here for easy verifications of content # integrity when copying around. no sums or content types or other # metadata here. application can handle that. # # classid is what class of file this belongs to. for instance, on fotobilder # there will be a class for original pictures (the ones the user uploaded) # and a class for derived images (scaled down versions, thumbnails, greyscale, etc) # each domain can setup classes and assign the minimum redundancy level for # each class. fotobilder will use a 2 or 3 minimum copy redundancy for original # photos and and a 1 minimum for derived images (which means the sole device # for a derived image can die, bringing devcount to 0 for that file, but # the application can recreate it from its original) DROP TABLE IF EXISTS file; CREATE TABLE file ( fid INT UNSIGNED NOT NULL, PRIMARY KEY (fid), dmid SMALLINT UNSIGNED NOT NULL, dkey VARCHAR(255), # domain-defined. LJ: "--" UNIQUE dkey (dmid, dkey), length INT UNSIGNED, # 4GB limit classid TINYINT UNSIGNED NOT NULL, devcount TINYINT UNSIGNED NOT NULL, INDEX devcount (dmid,classid,devcount) ); DROP TABLE IF EXISTS tempfile; CREATE TABLE tempfile ( fid INT UNSIGNED NOT NULL AUTO_INCREMENT, PRIMARY KEY (fid), createtime INT UNSIGNED NOT NULL, classid TINYINT UNSIGNED NOT NULL, dmid SMALLINT UNSIGNED NOT NULL, dkey VARCHAR(255), devids VARCHAR(60) ); # files marked for death when their key is overwritten. then they get a new # fid, but since the old row (with the old fid) had to be deleted immediately, # we need a place to store the fid so an async job can delete the file from # all devices. DROP TABLE IF EXISTS file_to_delete; CREATE TABLE file_to_delete ( fid INT UNSIGNED NOT NULL, PRIMARY KEY (fid) ); # if the replicator notices that a fid has no sources, that file gets inserted # into the unreachable_fids table. it is up to the application to actually # handle fids stored in this table. DROP TABLE IF EXISTS unreachable_fids; CREATE TABLE unreachable_fids ( fid INT UNSIGNED NOT NULL, lastupdate INT UNSIGNED NOT NULL, PRIMARY KEY (fid), INDEX (lastupdate) ); # what files are on what devices? (most likely physical devices, # as logical devices of RAID arrays would be costly, and mogilefs # already handles redundancy) # # the devid index lets us answer "What files were on this now-dead disk?" # DROP TABLE IF EXISTS file_on; CREATE TABLE file_on ( fid INT UNSIGNED NOT NULL, devid MEDIUMINT UNSIGNED NOT NULL, PRIMARY KEY (fid, devid), INDEX (devid) ); # if application or framework detects an error in one of the duplicate files # for whatever reason, it can register its complaint and the framework # will do some verifications and fix things up w/ an async job # MAYBE: let application tell us the SHA1/MD5 of the file for us to check # on the other devices? DROP TABLE IF EXISTS file_on_corrupt; CREATE TABLE file_on_corrupt ( fid INT UNSIGNED NOT NULL, devid MEDIUMINT UNSIGNED NOT NULL, PRIMARY KEY (fid, devid) ); DROP TABLE IF EXISTS device; CREATE TABLE device ( devid MEDIUMINT UNSIGNED NOT NULL, PRIMARY KEY (devid), hostid MEDIUMINT UNSIGNED NOT NULL, status ENUM('alive','dead','down'), INDEX (status), mb_total MEDIUMINT UNSIGNED, mb_used MEDIUMINT UNSIGNED, mb_asof INT UNSIGNED ); DROP TABLE IF EXISTS host; CREATE TABLE host ( hostid MEDIUMINT UNSIGNED NOT NULL, PRIMARY KEY (hostid), status ENUM('alive','dead','down'), http_port MEDIUMINT UNSIGNED DEFAULT 7500, http_get_port MEDIUMINT UNSIGNED, hostname VARCHAR(40), UNIQUE (hostname), hostip VARCHAR(15), UNIQUE (hostip), altip VARCHAR(15), UNIQUE (altip), altmask VARCHAR(18), remoteroot VARCHAR(60) );