mirror of
				https://github.com/zulip/zulip.git
				synced 2025-10-31 03:53:50 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			200 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			200 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/perl
 | |
| 
 | |
| # usage:
 | |
| # git clone --bare git@git.zulip.net:eng/zulip.git
 | |
| # cd zulip.git
 | |
| # git fast-export --export-marks=../zulip.em --progress=1000 --all > ../zulip.fe
 | |
| # git init --bare ../zulip-zanitized.git
 | |
| # cd ../zulip-zanitized.git
 | |
| # zanitizer ../zulip.fe ../zulip.em | git fast-import --quiet
 | |
| 
 | |
| use strict;
 | |
| use warnings;
 | |
| 
 | |
| use Digest::SHA qw(sha1_hex);
 | |
| use FindBin;
 | |
| 
 | |
| use lib $FindBin::Bin;
 | |
| use zanitizer_config;
 | |
| 
 | |
| sub eq_tree {
 | |
|     my ( $a, $b ) = @_;
 | |
|     !( grep { !exists $$b{$_} || $$a{$_} ne $$b{$_} } keys %$a )
 | |
|       && !( grep { !exists $$a{$_} } keys %$b );
 | |
| }
 | |
| 
 | |
| my ( $fast_export_file, $export_marks_file ) = @ARGV;
 | |
| 
 | |
| my %export_marks = ();
 | |
| if ( defined $export_marks_file ) {
 | |
|     open EXPORT_MARKS, '<', $export_marks_file
 | |
|       or die "cannot open $export_marks_file: $!";
 | |
|     %export_marks = map { split } <EXPORT_MARKS>;
 | |
|     close EXPORT_MARKS;
 | |
| }
 | |
| 
 | |
| my %mark_map      = ();
 | |
| my %blob_mark     = ();
 | |
| my %ref_commit    = ();
 | |
| my %commit_tree   = ();
 | |
| my %scrubbed_blob = ();
 | |
| my %scrubbed_file = ();
 | |
| my %deleted_file  = ();
 | |
| my %renamed_file  = ();
 | |
| 
 | |
| open FAST_EXPORT, '<', $fast_export_file
 | |
|   or die "cannot open $fast_export_file: $!";
 | |
| $_ = <FAST_EXPORT>;
 | |
| while ( defined $_ ) {
 | |
|     if ( $_ eq "blob\n" ) {
 | |
|         my ($mark) = <FAST_EXPORT> =~ /^mark (\S*)\n$/s or die;
 | |
|         my ($len)  = <FAST_EXPORT> =~ /^data (\d+)\n$/s or die;
 | |
|         read( FAST_EXPORT, my $data, $len ) == $len or die;
 | |
|         $_ = $data;
 | |
|         scrub_text;
 | |
|         if ( $_ ne $data ) {
 | |
|             $scrubbed_blob{$mark} = 1;
 | |
|             $data = $_;
 | |
|         }
 | |
|         <FAST_EXPORT> eq "\n" or die;
 | |
| 
 | |
|         my $hash = sha1_hex($data);
 | |
|         if ( exists $blob_mark{$hash} ) {
 | |
|             $mark_map{$mark} = $blob_mark{$hash};
 | |
|         }
 | |
|         else {
 | |
|             $blob_mark{$hash} = $mark_map{$mark} = $mark;
 | |
|             print "blob\nmark $mark\ndata ", length $data, "\n", $data, "\n";
 | |
|         }
 | |
|     }
 | |
|     elsif (/^reset (?'ref'.*)\n$/s) {
 | |
|         my $ref = $+{ref};
 | |
|         $_ = <FAST_EXPORT>;
 | |
|         my $from = undef;
 | |
|         while (1) {
 | |
|             if ( $_ eq "\n" ) {
 | |
|                 $_ = <FAST_EXPORT>;
 | |
|                 last;
 | |
|             }
 | |
|             elsif ( my ($from_) = /^from (?'from'.*)\n$/s ) {
 | |
|                 $from = $+{from};
 | |
|             }
 | |
|             else {
 | |
|                 # The trailing LF on reset is optional
 | |
|                 last;
 | |
|             }
 | |
|             $_ = <FAST_EXPORT>;
 | |
|         }
 | |
| 
 | |
|         $ref_commit{$ref} = $mark_map{from};
 | |
|         print "reset $ref\n";
 | |
|         print "from $mark_map{$from}\n"
 | |
|           if defined $from && defined $mark_map{$from};
 | |
|         print "\n";
 | |
| 
 | |
|         next;
 | |
|     }
 | |
|     elsif (/^commit (?'ref'.*)\n$/s) {
 | |
|         my $ref         = $+{ref};
 | |
|         my ($mark)      = <FAST_EXPORT> =~ /^mark (\S*)\n$/s     or die;
 | |
|         my ($author)    = <FAST_EXPORT> =~ /^author (.*)\n$/s    or die;
 | |
|         my ($committer) = <FAST_EXPORT> =~ /^committer (.*)\n$/s or die;
 | |
|         my ($len)       = <FAST_EXPORT> =~ /^data (\d+)\n$/s     or die;
 | |
|         read FAST_EXPORT, my ($data), $len;
 | |
|         $_ = <FAST_EXPORT>;
 | |
|         my $from = undef;
 | |
| 
 | |
|         if (/^from (?'from'.*)\n$/s) {
 | |
|             $from = $+{from};
 | |
|             $_    = <FAST_EXPORT>;
 | |
|         }
 | |
|         my $base  = defined $from ? $mark_map{$from} : $ref_commit{ref};
 | |
|         my @merge = ();
 | |
|         while (/^merge (?'mark'\S*)\n$/s) {
 | |
|             die "unimplemented case" if !defined $from;
 | |
|             push @merge, $+{mark};
 | |
|             $_ = <FAST_EXPORT>;
 | |
|         }
 | |
| 
 | |
|         # git fast-export incorrectly writes M before D when replacing
 | |
|         # a symlink with a directory.  We move every D before every M
 | |
|         # to work around this bug.
 | |
|         my @delete = ();
 | |
|         my @modify = ();
 | |
|         while (1) {
 | |
|             if ( $_ eq "\n" ) {
 | |
|                 last;
 | |
|             }
 | |
|             elsif (/^D (?'file'.*)\n$/s) {
 | |
|                 $_ = $+{file};
 | |
|                 scrub_filename;
 | |
|                 push @delete, { %+, file => $_ } if defined $_;
 | |
|             }
 | |
|             elsif (/^M (?'mode'\d+) (?'mark'\S+) (?'file'.*)\n$/s) {
 | |
|                 $_ = $+{file};
 | |
|                 scrub_filename;
 | |
|                 if ( defined $_ ) {
 | |
|                     $renamed_file{ $+{file} } = $_ if $_ ne $+{file};
 | |
|                     $scrubbed_file{$_} = 1 if exists $scrubbed_blob{ $+{mark} };
 | |
|                     push @modify, { %+, file => $_ };
 | |
|                 }
 | |
|                 else {
 | |
|                     $deleted_file{ $+{file} } = 1;
 | |
|                 }
 | |
|             }
 | |
|             else {
 | |
|                 die "unhandled command in commit: $_";
 | |
|             }
 | |
|             $_ = <FAST_EXPORT>;
 | |
|         }
 | |
|         my $base_tree = defined $base ? $commit_tree{$base} : {};
 | |
|         my %tree      = %$base_tree;
 | |
|         delete $tree{ $$_{file} } for @delete;
 | |
|         $tree{ $$_{file} } = "$$_{mode} $mark_map{$$_{mark}}" for @modify;
 | |
| 
 | |
|         if ( eq_tree( \%tree, $base_tree )
 | |
|             && !( grep { defined $mark_map{$_} } @merge ) )
 | |
|         {
 | |
|             $ref_commit{$ref} = $mark_map{$mark} = $base;
 | |
|         }
 | |
|         else {
 | |
|             $ref_commit{$ref}   = $mark_map{$mark} = $mark;
 | |
|             $commit_tree{$mark} = \%tree;
 | |
|             $_                  = $data;
 | |
|             scrub_text;
 | |
|             if ( exists $export_marks{$mark} ) {
 | |
|                 $_ .= "\n" until /\n\n$/;
 | |
|                 $_ .= "(imported from commit $export_marks{$mark})\n";
 | |
|             }
 | |
|             print
 | |
| "commit $ref\nmark $mark\nauthor $author\ncommitter $committer\ndata ",
 | |
|               length $_, "\n", $_;
 | |
|             if ( defined $from ) {
 | |
|                 die "unimplemented case" if !defined $mark_map{$from};
 | |
|                 print "from $mark_map{$from}\n";
 | |
|             }
 | |
|             for (@merge) {
 | |
|                 print "merge $mark_map{$_}\n" if defined $mark_map{$_};
 | |
|             }
 | |
|             print "D $$_{file}\n"                                for @delete;
 | |
|             print "M $$_{mode} $mark_map{$$_{mark}} $$_{file}\n" for @modify;
 | |
|             print "\n";
 | |
|         }
 | |
|     }
 | |
|     elsif (/^progress /) {
 | |
|         print $_;
 | |
|     }
 | |
|     else {
 | |
|         die "unhandled command: $_";
 | |
|     }
 | |
|     $_ = <FAST_EXPORT>;
 | |
| }
 | |
| close FAST_EXPORT;
 | |
| 
 | |
| print STDERR "Deleted files:\n";
 | |
| print STDERR "  $_\n" for sort keys %deleted_file;
 | |
| print STDERR "Renamed files:\n";
 | |
| print STDERR "  $_ => $renamed_file{$_}\n" for sort keys %renamed_file;
 | |
| print STDERR "Scrubbed files:\n";
 | |
| print STDERR "  $_\n" for sort keys %scrubbed_file;
 |