Darren Mothersele

Software Developer

Warning: You are viewing old, legacy content. Kept for posterity. Information is out of date. Code samples probably don't work. My opinions have probably changed. Browse at your own risk.

Migrating Node Revisions to Drupal 7

Jul 16, 2012

web-dev

A while ago I was working on a migration from Open Atrium (Drupal 6) to Drupal 7. I decided to use the Migrate module for this, as it provides a great framework for building migrations. Unfortunately the Migrate module doesn't (at the moment) support migrating of node revisions. There's discussion on the issue queue about migrating revisions here. I quickly hacked together some working code that worked for me. I'm going to assume some prior knowledge of the Migrate module. If it's new to you, check out this presentation from Drupalcon Dever which gives a good overview of the architecture.

The code below is working for me, and I'm using it to migrate content from Open Atrium to Drupal 7. The code does need some work, as it throws a few warning messages. It still migrates all nodes and revisions successfully, so it's usable if you're not a pedant :).

Migration using the Drupal Migrate module requires you to write some code. You can usually get away with using the provided Migration sources and destination handlers, and just write the code that handles the mapping of fields from source to target. In this case, I couldn't as the project required all history (revisions) to be migrated, and the standard MigrateDestinationNode class doesn't handle this. To get around this I first create a basic extension of the Node destination class to deal with revisions...

<?php
class MigrateDestinationRevision extends MigrateDestinationNode {
  static public function
getKeySchema() {
    return array(
     
'vid' => array(
       
'type' => 'int',
       
'unsigned' => TRUE,
       
'description' => 'ID of destination node revision',
      ),
    );
  }
  public function
fields($migration = NULL) {
   
$fields = parent::fields($migration);
   
$fields['vid'] = t('Node: <a href="@doc">Revision (vid)</a>', array('@doc' => 'http://drupal.org/node/1298724'));
    return
$fields;
  }
  public function
bulkRollback(array $vids) {
   
migrate_instrument_start('revision_delete_multiple');
   
$this->prepareRollback($vids);
    foreach (
$vids as $vid) {
        if (
$revision = node_load(NULL, $vid)) {
         
db_delete('node_revision')
            ->
condition('vid', $revision->vid)
            ->
execute();
         
module_invoke_all('node_revision_delete', $revision);
         
field_attach_delete_revision('node', $revision);
        }
    }
   
$this->completeRollback($vids);
   
migrate_instrument_stop('revision_delete_multiple');
  }
}
?>

I'm overriding the key schema to make it aware of vid revision ids, and adding some code to take care of deleting all node revisions when I want to rollback a migration.

I then create the node migration, but instead of migrating the latest versions of the nodes, I craft the SQL so that it migrates all nodes, but does so with their first ever revisions. This migration runs before the revision migration. I create the revision migration and make the node revision migration a dependency of the revision migration. Here's the basics of the node migration, with the detail removed, notice that I'm using an abstract class, I have other concrete classes that instantiate the migration with an actual content type. This allows me to reuse as much mapping code as possible...

<?php
abstract class AtriumNodeMigration extends Migration {
    protected
$query, $subquery, $count_query;
    public function
__construct($source_type = '', $dest_type = '') {
     
parent::__construct();
     
$this->description = "Migration of {$source_type} nodes from atrium database";
     
$this->dependencies = array('AtriumUser', 'AtriumFiles');
     
$this->map = new MigrateSQLMap($this->machineName, array(
       
'nid' => array(
         
'type' => 'int',
         
'not null' => TRUE,
         
'description' => 'Source node ID',
        ),
      ),
MigrateDestinationNode::getKeySchema());

     
// Subquery used to select the first revision when importing nodes 
     
$this->subquery = Database::getConnection('default', 'atrium')
        ->
select('node', 'n');
     
$this->subquery->join('node_revisions', 'r', 'n.nid = r.nid');
     
$this->subquery->condition('n.type', $source_type, '=');
     
$this->subquery->groupBy('r.nid');
     
$this->subquery->addExpression('MIN(r.vid)', 'vid');

   
// Subquery to select file attachments (from uploads)
   
$this->file_query = Database::getConnection('default', 'atrium')
      ->
select('upload', 'u');
   
$this->file_query->fields('u', array('vid'));
   
$this->file_query->addExpression("GROUP_CONCAT(u.fid SEPARATOR ',')", 'fids');
   
$this->file_query->groupBy('u.vid');

     
$this->query = Database::getConnection('default', 'atrium')
        ->
select('node', 'n');
     
$this->query->join('node_revisions', 'r', 'n.nid = r.nid');
     
$this->query->join('url_alias', 'a', 'a.src = CONCAT(\'node/\', n.nid)');
     
$this->query->join($this->subquery, 'minrev', 'r.vid = minrev.vid');
     
$this->query->condition('n.type', $source_type, '=');
     
$this->query->leftJoin($this->file_query, 'fids', 'fids.vid = r.vid');
     
$this->query->fields('fids', array('fids'));
   
       
$this->query->fields('n', array('nid', 'title', 'uid', 'status', 'created', 'changed', 'comment', 'promote', 'sticky'));
       
$this->query->fields('r', array('body', 'teaser', 'log'));
       
$this->query->fields('a', array('dst'));


     
$this->count_query = Database::getConnection('default', 'atrium')
        ->
select('node', 'n');
     
$this->count_query->join('node_revisions', 'r', 'n.nid = r.nid');
     
$this->count_query->join('url_alias', 'a', 'a.src = CONCAT(\'node/\', n.nid)');
     
$this->count_query->join($this->subquery, 'minrev', 'r.vid = minrev.vid');
     
$this->count_query->condition('n.type', $source_type, '=');
     
$this->count_query->leftJoin($this->file_query, 'fids', 'fids.vid = r.vid');
   
     
$this->count_query->addExpression('COUNT(r.nid)', 'cnt');
   
   
$this->destination = new MigrateDestinationNode($dest_type, array('text_format' => 'markdown'));

   
$this->addFieldMapping('uid', 'uid')
         ->
sourceMigration('AtriumUser')
         ->
defaultValue(1);
   
$this->addFieldMapping('revision_uid', 'uid')
         ->
sourceMigration('AtriumUser')
         ->
defaultValue(1);
   
$this->addFieldMapping('pathauto')
         ->
defaultValue(0);
   
$this->addFieldMapping('revision')
         ->
defaultValue(0);
   
$this->addFieldMapping('body', 'body');
   
$this->addFieldMapping('body:format')
         ->
defaultValue('markdown');
   
$this->addFieldMapping('body:summary', 'teaser');
       
   
$this->addFieldMapping('field_files', 'fids')->sourceMigration('AtriumFiles');       
   
$this->addFieldMapping('field_files:file_class')->defaultValue('MigrateFileFid');

   
$this->addFieldMapping('path', 'dst');

   
$this->addSimpleMappings(array('log', 'sticky', 'promote', 'status', 'created', 'changed', 'comment', 'title'));

   
$this->addUnmigratedDestinations(array('language', 'is_new', 'tnid', 'body:language',
    
'field_files:language', 'field_files:description', 'field_files:display'));
  }
}
?>

There's some code in the example above to construct the SQL and select the first revision of nodes. This is important, as the revision migration will migrate the actual node bodies for the whole revision history.

<?php
abstract class AtriumRevisionMigration extends Migration {
   
    protected
$query, $count_query;
   
  public function
__construct($source_type = '', $dest_type = '', $nid_map = '') {
   
parent::__construct();
     
$this->description = "Migration of {$source_type} revisions from atrium database";
     
$this->dependencies = array('AtriumUser', 'AtriumFiles', $nid_map);
     
$this->map = new MigrateSQLMap($this->machineName, array(
       
'vid' => array(
         
'type' => 'int',
         
'not null' => TRUE,
         
'description' => 'Source revision ID',
        ),
      ),
MigrateDestinationRevision::getKeySchema());
   
     
// Subquery to select file attachments (from uploads)
     
$this->file_query = Database::getConnection('default', 'atrium')
        ->
select('upload', 'u');
     
$this->file_query->fields('u', array('vid'));
     
$this->file_query->addExpression("GROUP_CONCAT(u.fid SEPARATOR ',')", 'fids');
     
$this->file_query->groupBy('u.vid');

     
$this->query = Database::getConnection('default', 'atrium')
        ->
select('node', 'n');
     
$this->query->join('node_revisions', 'r', 'n.nid = r.nid');
     
$this->query->join('url_alias', 'a', 'a.src = CONCAT(\'node/\', n.nid)');
     
$this->query->condition('n.type', $source_type, '=');
       
$this->query->fields('n', array('nid', 'title', 'uid', 'status', 'created', 'comment', 'promote', 'sticky'));
       
$this->query->fields('r', array('body', 'vid', 'teaser', 'log', 'timestamp'));
       
$this->query->fields('a', array('dst'));
     
$this->query->leftJoin($this->file_query, 'fids', 'fids.vid = r.vid');
     
$this->query->fields('fids', array('fids'));
 
     
$this->count_query = Database::getConnection('default', 'atrium')
        ->
select('node_revisions', 'r');
     
$this->count_query->join('node', 'n', 'n.nid = r.nid');
     
$this->count_query->join('url_alias', 'a', 'a.src = CONCAT(\'node/\', n.nid)');
     
$this->count_query->condition('n.type', $source_type, '=');
     
$this->count_query->addExpression('COUNT(r.vid)', 'cnt');
 
     
$this->destination = new MigrateDestinationRevision($dest_type, array('text_format' => 'markdown'));
       
   
$this->addFieldMapping('nid', 'nid')
         ->
sourceMigration($nid_map);

   
$this->addFieldMapping('field_files', 'fids')->sourceMigration('AtriumFiles');
   
$this->addFieldMapping('field_files:file_class')->defaultValue('MigrateFileFid');

   
$this->addFieldMapping('uid', 'uid')
         ->
sourceMigration('AtriumUser')
         ->
defaultValue(1);
   
$this->addFieldMapping('revision_uid', 'uid')
         ->
sourceMigration('AtriumUser')
         ->
defaultValue(1);

   
$this->addFieldMapping('changed', 'timestamp');
   
   
$this->addFieldMapping('body', 'body');
   
$this->addFieldMapping('body:format')
         ->
defaultValue('markdown');
   
$this->addFieldMapping('body:summary', 'teaser');
   
   
$this->addFieldMapping('pathauto')
         ->
defaultValue(0);
   
$this->addFieldMapping('revision')
         ->
defaultValue(1);

       
$this->addFieldMapping('path', 'dst');

       
$this->addSimpleMappings(array('log', 'sticky', 'promote', 'status', 'created', 'comment', 'title'));

       
$this->addUnmigratedDestinations(array('language', 'is_new', 'tnid', 'body:language',
    
'field_files:language', 'field_files:description', 'field_files:display'));
    }

    public function
complete($node, $row) {
     
// Fix uid and timestamp in node_revisions.
     
$query = db_update('node_revision')
               ->
condition('vid', $node->vid);
     
$fields['timestamp'] = $node->changed;
     
$fields['uid'] = $node->uid;
     
$query->fields($fields);
     
$query->execute();
    }
   
}
?>