标签归档:asm

Simulate the RMAN compressed backup and recovery procedure

I built a local db asmsid with database on ASM diskgroup(+DATA) and rman compressed backupset on staging file system mount point (/home/oracle/rman/asmsid).
The experiment verified that: No extra uncompression steps are required during recovery if you use RMAN’s integrated compression.

the rman_compressed_backup_recovery_procedure.sql files for the detail testing procedure. I have some activity for post verify the recovery result, destroy the database by removing the controlfile and system datafile and user datafile. The using below script for database recovery.

The Final recovery script and log, with “restore database” and “recover database”, it turn out that Rman will find and uncompress the datafile and archivelog for database recovery Automatically.

--restore & recover whole database
run{
set controlfile autobackup format for device type disk to '/home/rman/asmsid/asmsid_control_%F';
allocate channel c1 type disk format '/home/rman/asmsid/asmsid_full_%T_%U';
allocate channel c2 type disk format '/home/rman/asmsid/asmsid_full_%T_%U';
allocate channel c3 type disk format '/home/rman/asmsid/asmsid_full_%T_%U';
restore database;
recover database;
}
 
--log
allocated channel: c1
channel c1: sid=159 devtype=DISK
 
allocated channel: c2
channel c2: sid=151 devtype=DISK
 
allocated channel: c3
channel c3: sid=150 devtype=DISK
 
Starting restore at 04-JUN-10
 
channel c1: starting datafile backupset restore
channel c1: specifying datafile(s) to restore from backup set
restoring datafile 00002 to +DATA/asmsid/datafile/undotbs1.265.720650145
restoring datafile 00004 to +DATA/asmsid/datafile/users.263.720650145
channel c1: reading from backup piece /home/rman/asmsid/asmsid_full_20100604_09lfconv_1_1
channel c2: starting datafile backupset restore
channel c2: specifying datafile(s) to restore from backup set
restoring datafile 00003 to +DATA/asmsid/datafile/sysaux.262.720650145
restoring datafile 00005 to +DATA/asmsid/datafile/sidts.dbf
channel c2: reading from backup piece /home/rman/asmsid/asmsid_full_20100604_0alfconv_1_1
channel c3: starting datafile backupset restore
channel c3: specifying datafile(s) to restore from backup set
restoring datafile 00001 to +DATA/asmsid/datafile/system.267.720650145
channel c3: reading from backup piece /home/rman/asmsid/asmsid_full_20100604_08lfconv_1_1
channel c1: restored backup piece 1
piece handle=/home/rman/asmsid/asmsid_full_20100604_09lfconv_1_1 tag=TAG20100604T113110
channel c1: restore complete, elapsed time: 00:00:25
channel c2: restored backup piece 1
piece handle=/home/rman/asmsid/asmsid_full_20100604_0alfconv_1_1 tag=TAG20100604T113110
channel c2: restore complete, elapsed time: 00:00:25
channel c3: restored backup piece 1
piece handle=/home/rman/asmsid/asmsid_full_20100604_08lfconv_1_1 tag=TAG20100604T113110
channel c3: restore complete, elapsed time: 00:01:00
Finished restore at 04-JUN-10
 
Starting recover at 04-JUN-10
 
starting media recovery
 
archive log thread 1 sequence 28 is already on disk as file +DATA/asmsid/onlinelog/group_1.264.720657717
archive log thread 1 sequence 29 is already on disk as file +DATA/asmsid/onlinelog/group_2.260.720657723
archive log thread 1 sequence 30 is already on disk as file +DATA/asmsid/onlinelog/group_4.256.720657711
channel c1: starting archive log restore to default destination
channel c1: restoring archive log
archive log thread=1 sequence=26
channel c1: restoring archive log
archive log thread=1 sequence=27
channel c1: reading from backup piece /home/rman/asmsid/asmsid_arch_20100604_0dlfcvkp_1_1
channel c1: restored backup piece 1
piece handle=/home/rman/asmsid/asmsid_arch_20100604_0dlfcvkp_1_1 tag=TAG20100604T132857
channel c1: restore complete, elapsed time: 00:00:03
archive log filename=+DATA/archivelog/1_26_720648477.dbf thread=1 sequence=26
archive log filename=+DATA/archivelog/1_27_720648477.dbf thread=1 sequence=27
archive log filename=+DATA/asmsid/onlinelog/group_1.264.720657717 thread=1 sequence=28
archive log filename=+DATA/asmsid/onlinelog/group_2.260.720657723 thread=1 sequence=29
archive log filename=+DATA/asmsid/onlinelog/group_4.256.720657711 thread=1 sequence=30
media recovery complete, elapsed time: 00:00:05
Finished recover at 04-JUN-10
released channel: c1
released channel: c2
released channel: c3

the detail procedure(rman_compressed_backup_recovery_procedure.sql) is as below:

-- 1 create user sid to verify backup and recovery (refer to: Sid_Activity_Before_Recover.lst)
create tablespace sidts datafile '+DATA/asmsid/datafile/sidts.dbf' size 100m;
create user sid identified by sid default tablespace sidts;
grant connect,resource,dba to sid;
conn sid/sid
create table t1 as select * from all_objects where rownum < 100;

-- 2 clear the archive log and take whole database backup with compressed (refre to:rman_backup.log)

run {
allocate channel c1 type disk;
crosscheck archivelog all;
delete noprompt archivelog all;
}

run{
configure controlfile autobackup on;
configure controlfile autobackup format for device type disk to '/home/rman/asmsid/asmsid_control_%F';
allocate channel c1 type disk format '/home/rman/asmsid/asmsid_full_%T_%U' maxpiecesize 1G;
allocate channel c2 type disk format '/home/rman/asmsid/asmsid_full_%T_%U' maxpiecesize 1G;
allocate channel c3 type disk format '/home/rman/asmsid/asmsid_full_%T_%U' maxpiecesize 1G;
backup as compressed backupset database plus archivelog;
}

-- 3 create table t2 using sid and make the related redo is archived (refer to:Sid_Activity_Before_Recover.lst)
create table t2 as select * from all_objects where rownum < 100;
alter system switch logfile;
alter system checkpoint;


-- 4 backup the archivelog with compressed to filesystem. (refer to:rman_backup.log)
run{
configure controlfile autobackup on;
configure controlfile autobackup format for device type disk to '/home/rman/asmsid/asmsid_control_%F';
allocate channel c1 type disk format '/home/rman/asmsid/asmsid_arch_%T_%U' maxpiecesize 1G;
allocate channel c2 type disk format '/home/rman/asmsid/asmsid_arch_%T_%U' maxpiecesize 1G;
backup as compressed backupset archivelog all delete all input;
}


-- 5 create table t3 and archive the current redo log
create table t3 as select * from all_objects where rownum < 100;
alter system switch logfile;
alter system checkpoint;

-- 6 create table t4 using sid and run dbinfo to get the lastest checkpoint# (refer to: Sid_Activity_Before_Recover.lst)
create table t4 as select * from all_objects where rownum < 100;

-- 7 shutdown database

destroy the database (refer to: destroy.log)
delete the control file/system datafile/sidts datafile

-- 8 recover refer to rman_recover.log
-- 8.1 restore control
set dbid 4138627741
run{
set controlfile autobackup format for device type disk to '/home/rman/asmsid/asmsid_control_%F';
restore controlfile from autobackup;
}

alter database mount;

-- 8.2 restrore & recover whole database
run{
set controlfile autobackup format for device type disk to '/home/rman/asmsid/asmsid_control_%F';
allocate channel c1 type disk format '/home/rman/asmsid/asmsid_full_%T_%U';
allocate channel c2 type disk format '/home/rman/asmsid/asmsid_full_%T_%U';
allocate channel c3 type disk format '/home/rman/asmsid/asmsid_full_%T_%U';
restore database;
recover database;
}

-- 9 verify: run dbinfo to get the lastest checkpoint# and count(*) on t3 (refer to post_verify.sql)

create the folder for log_archive_dest in ASM

今天在做实验的时候”switch logfile”的时候系统Hang住了.

sid@ASMSID> alter system switch logfile;

因为之前发现Migrate到ASM后会自动创建controlfile/datafile/onlinelog这些folder, 主观认为Oracle也会去创建+DATA/archivelog.

sys@ASMSID> archive log list
Database log mode              Archive Mode
Automatic archival             Enabled
Archive destination            +DATA/archivelog
Oldest online log sequence     26
Next log sequence to archive   28
Current log sequence           28

所以一开始没有怀疑后台arch进程.于是做了hanganalyze和systemstate之后发现”switch logfile”还是hang在后台lgwr进程.
hanganalyze中的Open chains found, 166是lgwr的session id.

    <0/166/1/0x2c62a904/21630/rdbms ipc message>
 -- <0/140/715/0x2c6310cc/20268/switch logfile command>

于是去查backgroup_dump_dest中的arch0的trace file, 猜想原因就在log_archive_dest没有创建.手动创建+data/archivelog之后很快3个redo group都archive了, “switch log file”也完成了.

*** 2010-06-04 11:19:54.793
*** SERVICE NAME:(SYS$BACKGROUND) 2010-06-04 11:19:54.777
*** SESSION ID:(157.10) 2010-06-04 11:19:54.777
Error with archive destination 'LOG_ARCHIVE_DEST_1'
*** 2010-06-04 11:19:54.831 22384 kcrr.c
ORA-16038: log 1 sequence# 25 cannot be archived
ORA-00254: error in archive control string ''
ORA-00312: online log 1 thread 1: '+DATA/asmsid/onlinelog/group_1.264.720657717'
ORA-15173: entry 'archivelog' does not exist in directory '/'
*** 2010-06-04 11:21:25.397
Error with archive destination 'LOG_ARCHIVE_DEST_1'
*** 2010-06-04 11:21:25.398 22384 kcrr.c

Ref:Interpreting HANGANALYZE trace files to diagnose hanging and performance problems [ID 215858.1].htm

Migration of a Database to ASM

记录一下Migration之前碰到的问题:

1. 昨天用oracleasm安装asm_home的版本是10.2.0.1, db instance认不到asm diskgroup, 升级asm_home到10.2.0.5.0, 和ORACLE_HOME一样的版本
2. 在asm diskgroup上创建tablespace出现以下错误,

create tablespace sidtest datafile '+DATA/asmtest_01.dbf' size 10m
*
ERROR at line 1:
ORA-01119: error in creating database file '+DATA/asmtest_01.dbf'
ORA-15012: ASM file 'asmtest_01.dbf' does not exist
ORA-17502: ksfdcre:5 Failed to create file +DATA/asmtest_01.dbf
ORA-15081: failed to submit an I/O operation to a disk

3. 怀疑oracle用户对/dev/raw/中的裸设备没有读写权限,

[oracleasm@CS-RHEL raw]$ ls -l
total 0
crw------- 1 oracleasm asmdba 162, 1 Jun  2 17:05 raw1
crw------- 1 oracleasm asmdba 162, 2 Jun  2 17:04 raw2
crw------- 1 oracleasm asmdba 162, 3 Jun  2 17:04 raw3
crw------- 1 oracleasm asmdba 162, 4 Jun  2 17:04 raw4

[oracleasm@CS-RHEL raw]$ chmod a+rw *

[oracleasm@CS-RHEL raw]$ ls -l
total 0
crw-rw-rw- 1 oracleasm asmdba 162, 1 Jun  2 17:24 raw1
crw-rw-rw- 1 oracleasm asmdba 162, 2 Jun  2 17:19 raw2
crw-rw-rw- 1 oracleasm asmdba 162, 3 Jun  2 17:19 raw3
crw-rw-rw- 1 oracleasm asmdba 162, 4 Jun  2 17:19 raw4

4. 重启asm instance, create tablespace依然失败,这次是600错误

idle> create tablespace sidtest datafile '+DATA/asmtest_01.dbf' size 10m;

ERROR at line 1:
ORA-00600: internal error code, arguments: [kfmsFindGrp00], [Unknown error in NM], [], [], [], [],
[], []

5. 重启database instance, tablespace 创建成功. asm diskgroup终于可用了.

Migrate 用到的脚本:

重启instance

SHUTDOWN IMMEDIATE
startup mount

backup datafile copy到ASM Diskgroup

CONFIGURE DEVICE TYPE DISK PARALLELISM 4;
BACKUP AS COPY DATABASE FORMAT '+DATA';

backup spfile 到ASM Diskgroup

run {
   BACKUP AS BACKUPSET SPFILE;
   RESTORE SPFILE TO "+DATA/spfile";
}

新建pfile指向+DATA上面的spfile

/tmp/pfile.ora
SPFILE=+DATA/spfile

重启instance, 设置新的controlfile位置

SHUTDOWN IMMEDIATE
startup nomount pfile='/tmp/pfile.ora'
alter system set control_file='+DATA/ct1.f' scope=spfile sid='*';

重启instance, restore controlfile到+DATA, switch database 到diskgroug上, 直接open database, 不用recover database;

SHUTDOWN IMMEDIATE
startup nomount pfile='/tmp/pfile.ora'
restore control file from '/home/u02/app/oracle/product/11.1.0/db_1/oradata/SIDGD/control01.ctl';
alter database mount;
switch database to copy;
alter database open;

最后, 将pfile搬到$ORACLE_HOME/dbs, 这样Oracle每次启动都可以识别到+DATA/spfile

[oracle@CS-RHEL dbs]$ mv /tmp/pfile.ora initasmsid.ora

Ref:
10R1: http://download.oracle.com/docs/cd/B13789_01/server.101/b10734/rcmasm.htm
10R2: http://download.oracle.com/docs/cd/B19306_01/backup.102/b14191/rcmasm.htm#BRADV12121 (利用increment backup, 减少停机时间)

PS: 最后记得把online redo log 和temp tablespace Migrate到ASK上面

declare
   cursor rlc is
      select group# grp, thread# thr, bytes/1024 bytes_k, 'NO' srl
        from v$log
      union
      select group# grp, thread# thr, bytes/1024 bytes_k, 'YES' srl
        from v$standby_log
      order by 1;
   stmt     varchar2(2048);
   swtstmt  varchar2(1024) := 'alter system switch logfile';
   ckpstmt  varchar2(1024) := 'alter system checkpoint global';
begin
   for rlcRec in rlc loop
      if (rlcRec.srl = 'YES') then
         stmt := 'alter database add standby logfile thread ' ||
                 rlcRec.thr || ' ''+DATA'' size ' || 
                 rlcRec.bytes_k || 'K';
         execute immediate stmt;
         stmt := 'alter database drop standby logfile group ' || rlcRec.grp;
         execute immediate stmt;
      else
         stmt := 'alter database add logfile thread ' ||
                 rlcRec.thr || ' ''+DATA'' size ' ||  
                 rlcRec.bytes_k || 'K';
         execute immediate stmt;
         begin
            stmt := 'alter database drop logfile group ' || rlcRec.grp;
            dbms_output.put_line(stmt);
            execute immediate stmt;
         exception
            when others then
               execute immediate swtstmt;
               execute immediate ckpstmt;
               execute immediate stmt;
         end;
      end if;
   end loop;
end;

*
ERROR at line 1:
ORA-00350: log 3 of instance asmsid (thread 1) needs to be archived
ORA-00312: online log 3 thread 1:
'/home/u02/app/oracle/product/11.1.0/db_1/oradata/asmsid/redo03.log'
ORA-06512: at line 34
ORA-01623: log 3 is current log for instance asmsid (thread 1) - cannot drop
ORA-00312: online log 3 thread 1:
'/home/u02/app/oracle/product/11.1.0/db_1/oradata/asmsid/redo03.log'


sys@ASMSID> alter system archive log sequence 24 to '/home/rman/asmsid/';

System altered.
sys@ASMSID> alter database drop logfile group 3;
Database altered.

idle> @log

    GROUP#    THREAD#  SEQUENCE#   FIRST_CHANGE# STATUS           ARC      BYTES
---------- ---------- ---------- --------------- ---------------- --- ----------
MEMBER
------------------------------------------------------------
         1          1         25          337049 CURRENT          NO    52428800
+DATA/asmsid/onlinelog/group_1.264.720657717

         2          1          0               0 UNUSED           YES   52428800
+DATA/asmsid/onlinelog/group_2.260.720657723

         4          1          0               0 UNUSED           YES   52428800
+DATA/asmsid/onlinelog/group_4.256.720657711


RMAN> run {
2> set newname for tempfile 1 to '+DATA';
3> switch tempfile all;
4> }

关于uniform size和autoallocate的奇怪问题

用unifom size 创建一个1g的tablespace, resize到20G之后它还会有一个100m的datafile,真奇怪啊。用google和metalink都没找到相关的解释,先记录一下

sys@CS2DB> create tablespace b2b_test datafile
  2  '+DISKGROUP1/CS2DB/DATAFILE/b2b_test_01.dbf' size 1g,
  3  SEGMENT SPACE MANAGEMENT AUTO
  4  EXTENT MANAGEMENT LOCAL
UNIFORM SIZE 1m;
  5

Tablespace created.

sys@CS2DB> sys@CS2DB> alter database datafile '+DISKGROUP1/CS2DB/DATAFILE/b2b_test_01.dbf' resize 20000m;

Database altered.

sys@CS2DB> @ls b2b_test
old   9:     (select tablespace_name, file_id, file_name, autoextensible, bytes, maxbytes from dba_data_files where upper(tablespace_name) like upper('%&1%')
new   9:     (select tablespace_name, file_id, file_name, autoextensible, bytes, maxbytes from dba_data_files where upper(tablespace_name) like upper('%b2b_test%')
old  11:      select tablespace_name, file_id, file_name, autoextensible, bytes, maxbytes from dba_temp_files where upper(tablespace_name) like upper('%&1%')
new  11:      select tablespace_name, file_id, file_name, autoextensible, bytes, maxbytes from dba_temp_files where upper(tablespace_name) like upper('%b2b_test%')

TABLESPACE_NAME                   FILE_ID FILE_NAME                                                                        EXT         MB      MAXSZ
------------------------------ ---------- -------------------------------------------------------------------------------- --- ---------- ----------
B2B_TEST                                4 +DISKGROUP1/cs2db/datafile/b2b_test.263.706100349                                YES        100   32767.98
B2B_TEST                                2 +DISKGROUP1/cs2db/datafile/b2b_test_01.dbf                                       NO       20000

sys@CS2DB> select dbms_metadata.get_ddl('TABLESPACE','B2B_TEST') from dual;

DBMS_METADATA.GET_DDL('TABLESPACE','B2B_TEST')
--------------------------------------------------------------------------------

  CREATE TABLESPACE "B2B_TEST" DATAFILE
  '+DISKGROUP1/cs2db/datafile/b2b_test_01.dbf' SIZE 1073741824,
  '+DISKGROUP1/cs2db/datafile/b2b_test.263.706100349' SIZE 104857600
  AUTOEXTEND ON NEXT 104857600 MAXSIZE 32767M
  LOGGING ONLINE PERMANENT BLOCKSIZE 8192
  EXTENT MANAGEMENT LOCAL UNIFORM SIZE 1048576 SEGMENT SPACE MANAGEMENT AUTO
   ALTER DATABASE DATAFILE
  '+DISKGROUP1/cs2db/datafile/b2b_test_01.dbf' RESIZE 20971520000


用autoallocate就没有这个问题

sys@CS2DB> create tablespace b2b_test datafile
  2  '+DISKGROUP1/CS2DB/DATAFILE/b2b_test_01.dbf' size 1g;

Tablespace created.

sys@CS2DB> alter database datafile '+DISKGROUP1/CS2DB/DATAFILE/b2b_test_01.dbf' resize 20000m;

Database altered.

sys@CS2DB> @ls b2b_test
old   9:     (select tablespace_name, file_id, file_name, autoextensible, bytes, maxbytes from dba_data_files where upper(tablespace_name) like upper('%&1%')
new   9:     (select tablespace_name, file_id, file_name, autoextensible, bytes, maxbytes from dba_data_files where upper(tablespace_name) like upper('%b2b_test%')
old  11:      select tablespace_name, file_id, file_name, autoextensible, bytes, maxbytes from dba_temp_files where upper(tablespace_name) like upper('%&1%')
new  11:      select tablespace_name, file_id, file_name, autoextensible, bytes, maxbytes from dba_temp_files where upper(tablespace_name) like upper('%b2b_test%')

TABLESPACE_NAME                   FILE_ID FILE_NAME                                                                        EXT         MB      MAXSZ
------------------------------ ---------- -------------------------------------------------------------------------------- --- ---------- ----------
B2B_TEST                                2 +DISKGROUP1/cs2db/datafile/b2b_test_01.dbf                                       NO       20000

sys@CS2DB> select dbms_metadata.get_ddl('TABLESPACE','B2B_TEST') from dual;

DBMS_METADATA.GET_DDL('TABLESPACE','B2B_TEST')
--------------------------------------------------------------------------------

  CREATE TABLESPACE "B2B_TEST" DATAFILE
  '+DISKGROUP1/cs2db/datafile/b2b_test_01.dbf' SIZE 1073741824
  LOGGING ONLINE PERMANENT BLOCKSIZE 8192
  EXTENT MANAGEMENT LOCAL AUTOALLOCATE SEGMENT SPACE MANAGEMENT AUTO
   ALTER DATABASE DATAFILE
  '+DISKGROUP1/cs2db/datafile/b2b_test_01.dbf' RESIZE 20971520000