Message ID | 20221103152607.3940380-1-s.zhmylev@yadro.com |
---|---|
State | New |
Headers | show |
Series | wic: make ext2/3/4 images reproducible | expand |
On Thu, 2022-11-03 at 18:26 +0300, Sergey Zhmylev wrote: > From: Sergei Zhmylev <s.zhmylev@yadro.com> > > Ext2/3/4 FS contains not only mtime, but also ctime, atime and crtime. > Currently, all the files are being added into the rootfs image using > mkfs -d functionality which affects all the timestamps excluding mtime. > This patch ensures all the timestamps inside the FS image equal to > the SOURCE_DATE_EPOCH if it is set. > > Signed-off-by: Sergei Zhmylev <s.zhmylev@yadro.com> > --- > scripts/lib/wic/partition.py | 27 ++++++++++++++++++++++----- > 1 file changed, 22 insertions(+), 5 deletions(-) > > diff --git a/scripts/lib/wic/partition.py b/scripts/lib/wic/partition.py > index bc889bdeb9..b7965267cb 100644 > --- a/scripts/lib/wic/partition.py > +++ b/scripts/lib/wic/partition.py > @@ -294,17 +294,34 @@ class Partition(): > f.write("cd etc\n") > f.write("rm fstab\n") > f.write("write %s fstab\n" % (self.updated_fstab_path)) > - if os.getenv('SOURCE_DATE_EPOCH'): > - fstab_time = int(os.getenv('SOURCE_DATE_EPOCH')) > - for time in ["atime", "mtime", "ctime"]: > - f.write("set_inode_field fstab %s %s\n" % (time, hex(fstab_time))) > - f.write("set_inode_field fstab %s_extra 0\n" % (time)) > debugfs_cmd = "debugfs -w -f %s %s" % (debugfs_script_path, rootfs) > exec_native_cmd(debugfs_cmd, native_sysroot) > > mkfs_cmd = "fsck.%s -pvfD %s" % (self.fstype, rootfs) > exec_native_cmd(mkfs_cmd, native_sysroot, pseudo=pseudo) > > + if os.getenv('SOURCE_DATE_EPOCH'): > + sde_time = hex(int(os.getenv('SOURCE_DATE_EPOCH'))) > + debugfs_script_path = os.path.join(cr_workdir, "debugfs_script") > + files = [] > + for root, dirs, others in os.walk(rootfs_dir): > + base = root.replace(rootfs_dir, "").rstrip(os.sep) > + files += [ "/" if base == "" else base ] > + files += [ base + "/" + n for n in dirs + others ] > + with open(debugfs_script_path, "w") as f: > + f.write("set_current_time %s\n" % (sde_time)) > + f.write("set_super_value hash_seed %s\n" % (self.fsuuid)) > + for file in set(files): > + for time in ["atime", "mtime", "ctime", "crtime"]: > + f.write("set_inode_field \"%s\" %s %s\n" % (file, time, sde_time)) > + f.write("set_inode_field \"%s\" %s_extra 0\n" % (file, time)) > + for time in ["wtime", "mkfs_time", "lastcheck"]: > + f.write("set_super_value %s %s\n" % (time, sde_time)) > + for time in ["mtime", "first_error_time", "last_error_time"]: > + f.write("set_super_value %s 0\n" % (time)) > + debugfs_cmd = "debugfs -w -f %s %s" % (debugfs_script_path, rootfs) > + exec_native_cmd(debugfs_cmd, native_sysroot) > + > self.check_for_Y2038_problem(rootfs, native_sysroot) > > def prepare_rootfs_btrfs(self, rootfs, cr_workdir, oe_builddir, rootfs_dir, I don't think this is the correct behaviour as it will effectively remove all timestamp information. Many of the timestamps are ok to use and are reproducible (e.g anything from the packaging backend). There will only be recent entries like the ftab one that specifically need to be set to SDE. Some changes here are good (like the first/last error time and mkfs time and so on) but I think changing all of atime, mtime, ctime etc. is not needed. Could you see if there is a way we could only change the needed values. We could also consider a clamp approach where we only change timestamps later than SDE as that is successful in some other places. Cheers, Richard
Hi Richard, Thank you for the comment! Well, the environment described in reproducible guides currently does not provide binary reproducability due to extfs implementation. Moreover, building on some FS (for example mounted with noatime or not supporting crtime field at all like UFS) makes the extfs creation process totally not reproducible in spite of any modifications on the source files. I've checked the behaviour under multiple conditions, double checking that I've set as much reproducible stuff as possible, and it comes out that in certain circumstances rootfs files have incorrect timestamps -- they're set to the build process time. So I'm sure that if the user set SDE to a particular timestamp, we should also modify atime, ctime and crtime. P.S. overriding only the time later than SDE could have also been an option, but in reality we can not surely gather stat info for each file without debugfs or similar tools and this will tremendously degradate performance of already pretty slow image creation process. Due to this I suggest the patch I've sent. Many thanks! -- With best wishes, [cid:a4723402177076bcc35445d3d012d43099b2c9a3.camel@yadro.com-0] Sergei Zhmylev Engineering consultant OS development department В Чт, 03/11/2022 в 16:05 +0000, Richard Purdie пишет: On Thu, 2022-11-03 at 18:26 +0300, Sergey Zhmylev wrote: From: Sergei Zhmylev <s.zhmylev@yadro.com<mailto:s.zhmylev@yadro.com>> Ext2/3/4 FS contains not only mtime, but also ctime, atime and crtime. Currently, all the files are being added into the rootfs image using mkfs -d functionality which affects all the timestamps excluding mtime. This patch ensures all the timestamps inside the FS image equal to the SOURCE_DATE_EPOCH if it is set. Signed-off-by: Sergei Zhmylev <s.zhmylev@yadro.com<mailto:s.zhmylev@yadro.com>> --- scripts/lib/wic/partition.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/scripts/lib/wic/partition.py b/scripts/lib/wic/partition.py index bc889bdeb9..b7965267cb 100644 --- a/scripts/lib/wic/partition.py +++ b/scripts/lib/wic/partition.py @@ -294,17 +294,34 @@ class Partition(): f.write("cd etc\n") f.write("rm fstab\n") f.write("write %s fstab\n" % (self.updated_fstab_path)) - if os.getenv('SOURCE_DATE_EPOCH'): - fstab_time = int(os.getenv('SOURCE_DATE_EPOCH')) - for time in ["atime", "mtime", "ctime"]: - f.write("set_inode_field fstab %s %s\n" % (time, hex(fstab_time))) - f.write("set_inode_field fstab %s_extra 0\n" % (time)) debugfs_cmd = "debugfs -w -f %s %s" % (debugfs_script_path, rootfs) exec_native_cmd(debugfs_cmd, native_sysroot) mkfs_cmd = "fsck.%s -pvfD %s" % (self.fstype, rootfs) exec_native_cmd(mkfs_cmd, native_sysroot, pseudo=pseudo) + if os.getenv('SOURCE_DATE_EPOCH'): + sde_time = hex(int(os.getenv('SOURCE_DATE_EPOCH'))) + debugfs_script_path = os.path.join(cr_workdir, "debugfs_script") + files = [] + for root, dirs, others in os.walk(rootfs_dir): + base = root.replace(rootfs_dir, "").rstrip(os.sep) + files += [ "/" if base == "" else base ] + files += [ base + "/" + n for n in dirs + others ] + with open(debugfs_script_path, "w") as f: + f.write("set_current_time %s\n" % (sde_time)) + f.write("set_super_value hash_seed %s\n" % (self.fsuuid)) + for file in set(files): + for time in ["atime", "mtime", "ctime", "crtime"]: + f.write("set_inode_field \"%s\" %s %s\n" % (file, time, sde_time)) + f.write("set_inode_field \"%s\" %s_extra 0\n" % (file, time)) + for time in ["wtime", "mkfs_time", "lastcheck"]: + f.write("set_super_value %s %s\n" % (time, sde_time)) + for time in ["mtime", "first_error_time", "last_error_time"]: + f.write("set_super_value %s 0\n" % (time)) + debugfs_cmd = "debugfs -w -f %s %s" % (debugfs_script_path, rootfs) + exec_native_cmd(debugfs_cmd, native_sysroot) + self.check_for_Y2038_problem(rootfs, native_sysroot) def prepare_rootfs_btrfs(self, rootfs, cr_workdir, oe_builddir, rootfs_dir, I don't think this is the correct behaviour as it will effectively remove all timestamp information. Many of the timestamps are ok to use and are reproducible (e.g anything from the packaging backend). There will only be recent entries like the ftab one that specifically need to be set to SDE. Some changes here are good (like the first/last error time and mkfs time and so on) but I think changing all of atime, mtime, ctime etc. is not needed. Could you see if there is a way we could only change the needed values. We could also consider a clamp approach where we only change timestamps later than SDE as that is successful in some other places. Cheers, Richard
On Thu, 2022-11-03 at 18:53 +0000, Sergey Zhmylev wrote: > Hi Richard, > > Thank you for the comment! > Well, the environment described in reproducible guides currently does > not provide binary reproducability due to extfs implementation. Agreed, there is an issue here. I just don't think the patch looks quite right. > Moreover, building on some FS (for example mounted with noatime or > not supporting crtime field at all like UFS) makes the extfs creation > process totally not reproducible in spite of any modifications on the > source files. You're arguing we need to set crtime, ctime and atime which I can believe. The mtime however should be fine from the package manager so I don't think this patch should be changing it. There may be a case for just setting the others to mtime instead of SDE too? > I've checked the behaviour under multiple conditions, double checking > that I've set as much reproducible stuff as possible, and it comes > out that in certain circumstances rootfs files have incorrect > timestamps -- they're set to the build process time. Which timestamps though? Isn't mtime ok? > So I'm sure that if the user set SDE to a particular timestamp, we > should also modify atime, ctime and crtime. > > P.S. overriding only the time later than SDE could have also been an > option, but in reality we can not surely gather stat info for each > file without debugfs or similar tools and this will tremendously > degradate performance of already pretty slow image creation process. > Due to this I suggest the patch I've sent. That stat information should already be present? Cheers, Richard
On Thu, 2022-11-03 at 21:51 +0000, Richard Purdie wrote: > «Внимание! Данное письмо от внешнего адресата!» > > On Thu, 2022-11-03 at 18:53 +0000, Sergey Zhmylev wrote: > > Hi Richard, > > > > Thank you for the comment! > > Well, the environment described in reproducible guides currently > > does > > not provide binary reproducability due to extfs implementation. > > Agreed, there is an issue here. I just don't think the patch looks > quite right. > > > Moreover, building on some FS (for example mounted with noatime or > > not supporting crtime field at all like UFS) makes the extfs > > creation > > process totally not reproducible in spite of any modifications on > > the > > source files. > > You're arguing we need to set crtime, ctime and atime which I can > believe. The mtime however should be fine from the package manager so > I > don't think this patch should be changing it. > > There may be a case for just setting the others to mtime instead of > SDE > too? Unfortunately, there is no other way to properly get mtime of the files in order to clone it into atime/ctime/crtime, than create a large listing of the files and ask debugfs to print stat information, and then parse the long output creating a debugfs script. This process can lead to additional bugs and will take a time. > > > I've checked the behaviour under multiple conditions, double > > checking > > that I've set as much reproducible stuff as possible, and it comes > > out that in certain circumstances rootfs files have incorrect > > timestamps -- they're set to the build process time. > > Which timestamps though? Isn't mtime ok? mtime "mostly" looks ok, while the others were definitely bad. There were several files, not only fstab before my previous patch, but also rpmdb.sqlite and some dnf-related stuff, and so on, on which mtime was set improperly anyways. According to https://reproducible-builds.org/docs/source-date-epoch/ if the user specified SDE, the "last modification of something" (aka mtime) must be set to this value. Taking all the above into account, there is nothing bad in changing all the inodes timestamps: mtime (SDE requirement) and atime/ctime/crtime (to overcome extfs building issues) to the same value -- specified by the user. > > > So I'm sure that if the user set SDE to a particular timestamp, we > > should also modify atime, ctime and crtime. > > > > P.S. overriding only the time later than SDE could have also been > > an > > option, but in reality we can not surely gather stat info for each > > file without debugfs or similar tools and this will tremendously > > degradate performance of already pretty slow image creation > > process. > > Due to this I suggest the patch I've sent. > > That stat information should already be present? Right, but as I said, it's pretty tricky to obtain it for later use. > > Cheers, > > Richard > Thank you, Richard -- With best wishes, Sergei Zhmylev Engineering consultant OS development department
P.S. Richard, you can easily check it on yours system. Just make a clean build of something with all the reproducible stuff set and inspect the resulting FS image. Even with a very tiny core image (linux + bash + coreutils) there are several inodes with improper timestamps: $ /sbin/debugfs -f cmd ./platform.img 2>/dev/null |grep -e '^Inode:' -e 'mtime' |paste - - |grep 2022$ Inode: 1 Type: bad type Mode: 0000 Flags: 0x0 mtime: 0x6364f673 -- Fri Nov 4 14:24:35 2022 Inode: 2 Type: directory Mode: 0755 Flags: 0x80000 mtime: 0x6364f673:00000000 -- Fri Nov 4 14:24:35 2022 Inode: 7 Type: regular Mode: 0600 Flags: 0x0 mtime: 0x6364f673:00000000 -- Fri Nov 4 14:24:35 2022 Inode: 8 Type: regular Mode: 0600 Flags: 0x80000 mtime: 0x6364f673:00000000 -- Fri Nov 4 14:24:35 2022 Inode: 11 Type: directory Mode: 0700 Flags: 0x80000 mtime: 0x6364f673:00000000 -- Fri Nov 4 14:24:35 2022 Inode: 123 Type: regular Mode: 0664 Flags: 0x80000 mtime: 0x6364f677:00000000 -- Fri Nov 4 14:24:39 2022 cmd is just a command file of "stat <123>" lines from 1 to 20000. -- With best wishes, Sergei Zhmylev Engineering consultant OS development department On Fri, 2022-11-04 at 11:16 +0000, Sergey Zhmylev wrote: > On Thu, 2022-11-03 at 21:51 +0000, Richard Purdie wrote: > > > > On Thu, 2022-11-03 at 18:53 +0000, Sergey Zhmylev wrote: > > > Hi Richard, > > > > > > Thank you for the comment! > > > Well, the environment described in reproducible guides currently > > > does > > > not provide binary reproducability due to extfs implementation. > > > > Agreed, there is an issue here. I just don't think the patch looks > > quite right. > > > > > Moreover, building on some FS (for example mounted with noatime > > > or > > > not supporting crtime field at all like UFS) makes the extfs > > > creation > > > process totally not reproducible in spite of any modifications on > > > the > > > source files. > > > > You're arguing we need to set crtime, ctime and atime which I can > > believe. The mtime however should be fine from the package manager > > so > > I > > don't think this patch should be changing it. > > > > There may be a case for just setting the others to mtime instead of > > SDE > > too? > > Unfortunately, there is no other way to properly get mtime of the > files > in order to clone it into atime/ctime/crtime, than create a large > listing of the files and ask debugfs to print stat information, and > then parse the long output creating a debugfs script. > This process can lead to additional bugs and will take a time. > > > > > > I've checked the behaviour under multiple conditions, double > > > checking > > > that I've set as much reproducible stuff as possible, and it > > > comes > > > out that in certain circumstances rootfs files have incorrect > > > timestamps -- they're set to the build process time. > > > > Which timestamps though? Isn't mtime ok? > > mtime "mostly" looks ok, while the others were definitely bad. > There were several files, not only fstab before my previous patch, > but > also rpmdb.sqlite and some dnf-related stuff, and so on, on which > mtime > was set improperly anyways. > > According to https://reproducible-builds.org/docs/source-date- > epoch/ if > the user specified SDE, the "last modification of something" (aka > mtime) must be set to this value. > > Taking all the above into account, there is nothing bad in changing > all > the inodes timestamps: mtime (SDE requirement) and atime/ctime/crtime > (to overcome extfs building issues) to the same value -- specified by > the user. > > > > > > So I'm sure that if the user set SDE to a particular timestamp, > > > we > > > should also modify atime, ctime and crtime. > > > > > > P.S. overriding only the time later than SDE could have also been > > > an > > > option, but in reality we can not surely gather stat info for > > > each > > > file without debugfs or similar tools and this will tremendously > > > degradate performance of already pretty slow image creation > > > process. > > > Due to this I suggest the patch I've sent. > > > > That stat information should already be present? > > Right, but as I said, it's pretty tricky to obtain it for later use. > > > > > Cheers, > > > > Richard > > > > Thank you, Richard > > -- > With best wishes, > Sergei Zhmylev > Engineering consultant > OS development department > > > -=-=-=-=-=-=-=-=-=-=-=- > Links: You receive all messages sent to this group. > View/Reply Online (#172708): > https://lists.openembedded.org/g/openembedded-core/message/172708 > Mute This Topic: https://lists.openembedded.org/mt/94758789/7220546 > Group Owner: openembedded-core+owner@lists.openembedded.org > Unsubscribe: https://lists.openembedded.org/g/openembedded- > core/unsub [s.zhmylev@yadro.com] > -=-=-=-=-=-=-=-=-=-=-=- >
On Fri, 2022-11-04 at 11:16 +0000, Sergey Zhmylev wrote: > On Thu, 2022-11-03 at 21:51 +0000, Richard Purdie wrote: > > «Внимание! Данное письмо от внешнего адресата!» > > > > On Thu, 2022-11-03 at 18:53 +0000, Sergey Zhmylev wrote: > > > Hi Richard, > > > > > > Thank you for the comment! > > > Well, the environment described in reproducible guides currently > > > does > > > not provide binary reproducability due to extfs implementation. > > > > Agreed, there is an issue here. I just don't think the patch looks > > quite right. > > > > > Moreover, building on some FS (for example mounted with noatime or > > > not supporting crtime field at all like UFS) makes the extfs > > > creation > > > process totally not reproducible in spite of any modifications on > > > the > > > source files. > > > > You're arguing we need to set crtime, ctime and atime which I can > > believe. The mtime however should be fine from the package manager so > > I > > don't think this patch should be changing it. > > > > There may be a case for just setting the others to mtime instead of > > SDE > > too? > > Unfortunately, there is no other way to properly get mtime of the files > in order to clone it into atime/ctime/crtime, than create a large > listing of the files and ask debugfs to print stat information, and > then parse the long output creating a debugfs script. > This process can lead to additional bugs and will take a time. What about adding a new command to debugfs which basically does this for us? There are two options I can see, one to clone the fs info, the other could be a clamp operation, process all the files and change any file with a timestamp later than SDE, to SDE. I'd imagine upstream could understand a usecase for this. > > > I've checked the behaviour under multiple conditions, double > > > checking > > > that I've set as much reproducible stuff as possible, and it comes > > > out that in certain circumstances rootfs files have incorrect > > > timestamps -- they're set to the build process time. > > > > Which timestamps though? Isn't mtime ok? > > mtime "mostly" looks ok, while the others were definitely bad. > There were several files, not only fstab before my previous patch, but > also rpmdb.sqlite and some dnf-related stuff, and so on, on which mtime > was set improperly anyways. > > According to https://reproducible-builds.org/docs/source-date-epoch/ if > the user specified SDE, the "last modification of something" (aka > mtime) must be set to this value. That link is a guideline, not a hard requirement. We go to quite some lengths to preserve file timestamps throughout our build process so I don't feel that destroying them all when creating the filesystem is a particularly great solution. I'd like to see mtime preserved and only values greater than SDE being clamped. > Taking all the above into account, there is nothing bad in changing all > the inodes timestamps: mtime (SDE requirement) and atime/ctime/crtime > (to overcome extfs building issues) to the same value -- specified by > the user. Yes, there is. It isn't what the package managers created or expected and the timestamps don't match the packages. SDE is a tool but the reproducible builds people would not advocate for setting all timestamps to SDE, only those that otherwise wouldn't be reproducible. > > > > > So I'm sure that if the user set SDE to a particular timestamp, we > > > should also modify atime, ctime and crtime. > > > > > > P.S. overriding only the time later than SDE could have also been > > > an > > > option, but in reality we can not surely gather stat info for each > > > file without debugfs or similar tools and this will tremendously > > > degradate performance of already pretty slow image creation > > > process. > > > Due to this I suggest the patch I've sent. > > > > That stat information should already be present? > > Right, but as I said, it's pretty tricky to obtain it for later use. I think we should think about a slightly different approach with more involvement with the tools. It may be worth talking to upstream to see what may or may not be acceptable. In particular, I don't want to destroy all the mtime data. Cheers, Richard
diff --git a/scripts/lib/wic/partition.py b/scripts/lib/wic/partition.py index bc889bdeb9..b7965267cb 100644 --- a/scripts/lib/wic/partition.py +++ b/scripts/lib/wic/partition.py @@ -294,17 +294,34 @@ class Partition(): f.write("cd etc\n") f.write("rm fstab\n") f.write("write %s fstab\n" % (self.updated_fstab_path)) - if os.getenv('SOURCE_DATE_EPOCH'): - fstab_time = int(os.getenv('SOURCE_DATE_EPOCH')) - for time in ["atime", "mtime", "ctime"]: - f.write("set_inode_field fstab %s %s\n" % (time, hex(fstab_time))) - f.write("set_inode_field fstab %s_extra 0\n" % (time)) debugfs_cmd = "debugfs -w -f %s %s" % (debugfs_script_path, rootfs) exec_native_cmd(debugfs_cmd, native_sysroot) mkfs_cmd = "fsck.%s -pvfD %s" % (self.fstype, rootfs) exec_native_cmd(mkfs_cmd, native_sysroot, pseudo=pseudo) + if os.getenv('SOURCE_DATE_EPOCH'): + sde_time = hex(int(os.getenv('SOURCE_DATE_EPOCH'))) + debugfs_script_path = os.path.join(cr_workdir, "debugfs_script") + files = [] + for root, dirs, others in os.walk(rootfs_dir): + base = root.replace(rootfs_dir, "").rstrip(os.sep) + files += [ "/" if base == "" else base ] + files += [ base + "/" + n for n in dirs + others ] + with open(debugfs_script_path, "w") as f: + f.write("set_current_time %s\n" % (sde_time)) + f.write("set_super_value hash_seed %s\n" % (self.fsuuid)) + for file in set(files): + for time in ["atime", "mtime", "ctime", "crtime"]: + f.write("set_inode_field \"%s\" %s %s\n" % (file, time, sde_time)) + f.write("set_inode_field \"%s\" %s_extra 0\n" % (file, time)) + for time in ["wtime", "mkfs_time", "lastcheck"]: + f.write("set_super_value %s %s\n" % (time, sde_time)) + for time in ["mtime", "first_error_time", "last_error_time"]: + f.write("set_super_value %s 0\n" % (time)) + debugfs_cmd = "debugfs -w -f %s %s" % (debugfs_script_path, rootfs) + exec_native_cmd(debugfs_cmd, native_sysroot) + self.check_for_Y2038_problem(rootfs, native_sysroot) def prepare_rootfs_btrfs(self, rootfs, cr_workdir, oe_builddir, rootfs_dir,